1 General information

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

We analyze a dataset published on Kaggle. It refers to french employment, salaries, population per town. The aim is to evaluate equality/inequalities in France, and geographical distribution of business according to their size.

Such data are collected by the INSEE. Information regarding the number of firms in every french town, categorized by size can be found here. Information about salaries around french town per job categories, age and sex (expressed in average net amount per hour in euro) can be found here. Demographic information in France per town, age, sex and living mode can be found here.

Additional info about Population Data can be found here, it allows to add catégorie socioprofessionnelle.

1.1 Aim of the study

This project aims to explore existing patterns among French towns.

In particular, we are interested in:

  • evaluating possible ineqialities: per towns/region, sex, age, etc.;
  • predicting the … using a regression model;
  • reduce the dimensionality of … performing a PCA;
  • explore different algorithms to cluster male/females using …

2 General pre-processing phase

Import data:

# options(encoding = "UTF-8")  # for Mac [PROBABLY, to check]
# options(encoding = "ISO-8859-1")  # for Windows [PROBABLY NOT WORKING]
setwd("./data")
firms       <- read.csv("base_etablissement_par_tranche_effectif.csv", encoding = "UTF-8")
geo         <- read.csv("name_geographic_information.csv", encoding = "UTF-8")
salary      <- read.csv("net_salary_per_town_categories.csv", encoding = "UTF-8")
population  <- read.csv("population.csv", encoding = "UTF-8")

Check variable names:

names(firms) 
 [1] "CODGEO"   "LIBGEO"   "REG"      "DEP"      "E14TST"   "E14TS0ND" "E14TS1"   "E14TS6"   "E14TS10"  "E14TS20" 
[11] "E14TS50"  "E14TS100" "E14TS200" "E14TS500"
names(population)
[1] "NIVGEO"    "CODGEO"    "LIBGEO"    "MOCO"      "AGEQ80_17" "SEXE"      "NB"       
names(salary)
 [1] "CODGEO"    "LIBGEO"    "SNHM14"    "SNHMC14"   "SNHMP14"   "SNHME14"   "SNHMO14"   "SNHMF14"   "SNHMFC14" 
[10] "SNHMFP14"  "SNHMFE14"  "SNHMFO14"  "SNHMH14"   "SNHMHC14"  "SNHMHP14"  "SNHMHE14"  "SNHMHO14"  "SNHM1814" 
[19] "SNHM2614"  "SNHM5014"  "SNHMF1814" "SNHMF2614" "SNHMF5014" "SNHMH1814" "SNHMH2614" "SNHMH5014"
names(geo)
 [1] "EU_circo"               "code_région"            "nom_région"             "chef.lieu_région"      
 [5] "numéro_département"     "nom_département"        "préfecture"             "numéro_circonscription"
 [9] "nom_commune"            "codes_postaux"          "code_insee"             "latitude"              
[13] "longitude"              "éloignement"           

To better understand the data we assign meaningful names and drop some variables which are not needed (at the moment):

names(firms)[2:ncol(firms)] <-
  c("town", 
    "regNum",
    "deptNum",
    "total",
    "null",
    "firmsEmpl_1_5",
    "firmsEmpl_6_9",
    "firmsEmpl_10_19",
    "firmsEmpl_20_49",
    "firmsEmpl_50_99",
    "firmsEmpl_100_199",
    "firmsEmpl_200_499",
    "firmsEmpl_500plus")
names(salary)[2:ncol(salary)] <-
  c("town",
    "sal_general",    
    "sal_executive",
    "sal_midManager",
    "sal_employee",
    "sal_worker",
    "sal_Females",
    "sal_F_executive",
    "sal_F_midManager",
    "sal_F_employee",
    "sal_F_worker",
    "sal_Males",
    "sal_M_executive",
    "sal_M_midManager",
    "sal_M_employee",
    "sal_M_worker",
    "sal_18_25",
    "sal_26_50",
    "sal_51plus",
    "sal_F_18_25",
    "sal_F_26_50",
    "sal_F_51plus",
    "sal_M_18_25",
    "sal_M_26_50",
    "sal_M_51plus")
names(population)[5:7] <-
  c("ageCateg5",
    "sex",
    "peopleCategNum")
# Drop unnecessary columns (code/num and name represents same thing)
geo <- subset(geo, select = -c(EU_circo, code_région, numéro_département, préfecture, numéro_circonscription, éloignement))
# change names 
names(geo)[1:6] = 
  c("region", 
    "region_capital", 
    "department", 
    "town_name", 
    "postal_code", 
    "CODGEO")

Check variable names:

names(firms) 
 [1] "CODGEO"            "town"              "regNum"            "deptNum"           "total"            
 [6] "null"              "firmsEmpl_1_5"     "firmsEmpl_6_9"     "firmsEmpl_10_19"   "firmsEmpl_20_49"  
[11] "firmsEmpl_50_99"   "firmsEmpl_100_199" "firmsEmpl_200_499" "firmsEmpl_500plus"
names(population)
[1] "NIVGEO"         "CODGEO"         "LIBGEO"         "MOCO"           "ageCateg5"      "sex"           
[7] "peopleCategNum"
names(salary)
 [1] "CODGEO"           "town"             "sal_general"      "sal_executive"    "sal_midManager"  
 [6] "sal_employee"     "sal_worker"       "sal_Females"      "sal_F_executive"  "sal_F_midManager"
[11] "sal_F_employee"   "sal_F_worker"     "sal_Males"        "sal_M_executive"  "sal_M_midManager"
[16] "sal_M_employee"   "sal_M_worker"     "sal_18_25"        "sal_26_50"        "sal_51plus"      
[21] "sal_F_18_25"      "sal_F_26_50"      "sal_F_51plus"     "sal_M_18_25"      "sal_M_26_50"     
[26] "sal_M_51plus"    
names(geo)
[1] "region"         "region_capital" "department"     "town_name"      "postal_code"    "CODGEO"        
[7] "latitude"       "longitude"     

[[MOVE LATER]] According to the information provided, the CODGEO variable (in firms, salary and population) and code_insee (in geo) have to be merged. However, for different reasons already identified by a kaggle user on his kernel they do not. To do so:

firms$CODGEO <- sub("A", "0", firms$CODGEO)
firms$CODGEO <- sub("B", "0", firms$CODGEO)
salary$CODGEO <- sub("A", "0", salary$CODGEO)
salary$CODGEO <- sub("B", "0", salary$CODGEO)
population$CODGEO <- sub("A", "0", population$CODGEO)
population$CODGEO <- sub("B", "0", population$CODGEO)

3 Analyze firms data

3.1 Pre-processing

# preliminary checks
dim(firms)
[1] 36681    14
names(firms)
 [1] "CODGEO"            "town"              "regNum"            "deptNum"           "total"            
 [6] "null"              "firmsEmpl_1_5"     "firmsEmpl_6_9"     "firmsEmpl_10_19"   "firmsEmpl_20_49"  
[11] "firmsEmpl_50_99"   "firmsEmpl_100_199" "firmsEmpl_200_499" "firmsEmpl_500plus"
head(firms)
str(firms)
'data.frame':   36681 obs. of  14 variables:
 $ CODGEO           : chr  "01001" "01002" "01004" "01005" ...
 $ town             : Factor w/ 34142 levels "Aast","Abainville",..: 13659 13661 442 444 460 480 484 581 638 802 ...
 $ regNum           : int  82 82 82 82 82 82 82 82 82 82 ...
 $ deptNum          : Factor w/ 101 levels "01","02","03",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ total            : int  25 10 996 99 4 124 48 22 33 14 ...
 $ null             : int  22 9 577 73 4 87 28 17 23 11 ...
 $ firmsEmpl_1_5    : int  1 1 272 20 0 20 15 4 8 2 ...
 $ firmsEmpl_6_9    : int  2 0 63 3 0 10 2 1 1 1 ...
 $ firmsEmpl_10_19  : int  0 0 46 1 0 5 3 0 0 0 ...
 $ firmsEmpl_20_49  : int  0 0 24 2 0 2 0 0 0 0 ...
 $ firmsEmpl_50_99  : int  0 0 9 0 0 0 0 0 0 0 ...
 $ firmsEmpl_100_199: int  0 0 3 0 0 0 0 0 1 0 ...
 $ firmsEmpl_200_499: int  0 0 2 0 0 0 0 0 0 0 ...
 $ firmsEmpl_500plus: int  0 0 0 0 0 0 0 0 0 0 ...
summary(firms)
    CODGEO                      town           regNum         deptNum          total               null         
 Length:36681       Sainte-Colombe:   14   Min.   : 1.00   62     :  895   Min.   :     0.0   Min.   :     0.0  
 Class :character   Saint-Sauveur :   12   1st Qu.:25.00   02     :  816   1st Qu.:     8.0   1st Qu.:     6.0  
 Mode  :character   Beaulieu      :   11   Median :43.00   80     :  782   Median :    19.0   Median :    14.0  
                    Sainte-Marie  :   11   Mean   :49.42   76     :  745   Mean   :   123.5   Mean   :    83.6  
                    Le Pin        :   10   3rd Qu.:73.00   57     :  730   3rd Qu.:    54.0   3rd Qu.:    39.0  
                    Saint-Aubin   :   10   Max.   :94.00   14     :  706   Max.   :427385.0   Max.   :316603.0  
                    (Other)       :36613                   (Other):32007                                        
 firmsEmpl_1_5      firmsEmpl_6_9       firmsEmpl_10_19   firmsEmpl_20_49    firmsEmpl_50_99     firmsEmpl_100_199 
 Min.   :    0.00   Min.   :    0.000   Min.   :    0.0   Min.   :   0.000   Min.   :   0.0000   Min.   :  0.0000  
 1st Qu.:    1.00   1st Qu.:    0.000   1st Qu.:    0.0   1st Qu.:   0.000   1st Qu.:   0.0000   1st Qu.:  0.0000  
 Median :    3.00   Median :    0.000   Median :    0.0   Median :   0.000   Median :   0.0000   Median :  0.0000  
 Mean   :   27.29   Mean   :    5.221   Mean   :    3.8   Mean   :   2.296   Mean   :   0.7383   Mean   :  0.3324  
 3rd Qu.:   11.00   3rd Qu.:    2.000   3rd Qu.:    1.0   3rd Qu.:   1.000   3rd Qu.:   0.0000   3rd Qu.:  0.0000  
 Max.   :76368.00   Max.   :14836.000   Max.   :10829.0   Max.   :5643.000   Max.   :1658.0000   Max.   :812.0000  
                                                                                                                   
 firmsEmpl_200_499  firmsEmpl_500plus  
 Min.   :  0.0000   Min.   :  0.00000  
 1st Qu.:  0.0000   1st Qu.:  0.00000  
 Median :  0.0000   Median :  0.00000  
 Mean   :  0.1728   Mean   :  0.04842  
 3rd Qu.:  0.0000   3rd Qu.:  0.00000  
 Max.   :456.0000   Max.   :180.00000  
                                       
# converting CODGEO format
firms$CODGEO <- as.numeric(firms$CODGEO)
# Check for duplicated data
sum(duplicated.data.frame(firms))
[1] 0
# Categorize firms' size according to EU standard, but slightly different for medium and large firms (medium firms have <200 instead of <250 employees)
# http://ec.europa.eu/eurostat/statistics-explained/index.php/Glossary:Enterprise_size
firms$micro   <- firms$firmsEmpl_1_5 + firms$firmsEmpl_6_9
firms$small   <- firms$firmsEmpl_10_19 + firms$firmsEmpl_20_49
firms$medium  <- firms$firmsEmpl_50_99 + firms$firmsEmpl_100_199
firms$large   <- firms$firmsEmpl_200_499 + firms$firmsEmpl_500plus
# Drop unnecessary (at the moment) columns 
firms <- subset(firms, select = c(CODGEO, town, total, micro, small, medium, large, null))
# check
head(firms)
summary(firms)
     CODGEO                  town           total              micro              small          
 Min.   : 1001   Sainte-Colombe:   14   Min.   :     0.0   Min.   :    0.00   Min.   :    0.000  
 1st Qu.:24564   Saint-Sauveur :   12   1st Qu.:     8.0   1st Qu.:    1.00   1st Qu.:    0.000  
 Median :48171   Beaulieu      :   11   Median :    19.0   Median :    4.00   Median :    0.000  
 Mean   :46263   Sainte-Marie  :   11   Mean   :   123.5   Mean   :   32.51   Mean   :    6.097  
 3rd Qu.:67012   Le Pin        :   10   3rd Qu.:    54.0   3rd Qu.:   13.00   3rd Qu.:    2.000  
 Max.   :97617   Saint-Aubin   :   10   Max.   :427385.0   Max.   :91204.00   Max.   :16472.000  
                 (Other)       :36613                                                            
     medium             large               null         
 Min.   :   0.000   Min.   :  0.0000   Min.   :     0.0  
 1st Qu.:   0.000   1st Qu.:  0.0000   1st Qu.:     6.0  
 Median :   0.000   Median :  0.0000   Median :    14.0  
 Mean   :   1.071   Mean   :  0.2212   Mean   :    83.6  
 3rd Qu.:   0.000   3rd Qu.:  0.0000   3rd Qu.:    39.0  
 Max.   :2470.000   Max.   :636.0000   Max.   :316603.0  
                                                         
str(firms)
'data.frame':   36681 obs. of  8 variables:
 $ CODGEO: num  1001 1002 1004 1005 1006 ...
 $ town  : Factor w/ 34142 levels "Aast","Abainville",..: 13659 13661 442 444 460 480 484 581 638 802 ...
 $ total : int  25 10 996 99 4 124 48 22 33 14 ...
 $ micro : int  3 1 335 23 0 30 17 5 9 3 ...
 $ small : int  0 0 70 3 0 7 3 0 0 0 ...
 $ medium: int  0 0 12 0 0 0 0 0 1 0 ...
 $ large : int  0 0 2 0 0 0 0 0 0 0 ...
 $ null  : int  22 9 577 73 4 87 28 17 23 11 ...
# deptNum has to be factor?

3.2 EDA

# there is an obs with more than 316K null data: we check if it is plausible
# get the highest 20 null values
str_firms <- sort(firms$null, decreasing = T)[1:20]
# get their indexes
str_firms_ind <- match(str_firms, firms$null)
# get the corresponding city
firms$town[str_firms_ind]
 [1] Paris                Marseille            Lyon                 Nice                 Toulouse            
 [6] Bordeaux             Montpellier          Nantes               Strasbourg           Lille               
[11] Aix-en-Provence      Boulogne-Billancourt Fort-de-France       Rennes               Grenoble            
[16] Toulon               Cannes               Saint-Denis          Neuilly-sur-Seine    Nîmes               
34142 Levels: Aast Abainville Abancourt Abaucourt Abaucourt-Hautecourt Abbans-Dessous Abbans-Dessus ... Zuytpeene
# hence, it seems reasonable..
# check the ratio of null for each town
summary(firms$null/firms$total)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 0.0000  0.6603  0.7500  0.7526  0.8571  1.0000     399 
# a lot of information is missing
# should we remove these data?
hist(firms$null/firms$total)

# evaluate the distribution of all the sizes 
# (log vs. ratio wrt total?)
hist(log(firms$total))

hist(log(firms$null))

hist(log(firms$micro))

hist(firms$micro/firms$total)

hist(log(firms$small))

hist(firms$small/firms$total)

hist(log(firms$medium))

hist(firms$medium/firms$total)

hist(log(firms$large))

hist(firms$large/firms$total)

# keep only logs?

PCA on firms data:

firms_clean <- firms[firms$micro < 20000 & firms$large < 200,]
myPr <- prcomp(firms_clean[, 4:8], scale = TRUE)
#plot(scale(firms_clean$micro), scale(firms_clean$large))
#mean(firms_clean$micro)
#mean(firms_clean$large)
myPr
Standard deviations (1, .., p=5):
[1] 2.14610999 0.54465371 0.26631012 0.13983882 0.08419175

Rotation (n x k) = (5 x 5):
             PC1         PC2         PC3        PC4         PC5
micro  0.4533436  0.40064594 -0.03896827  0.3114187  0.73175293
small  0.4605093  0.09419396  0.42164740  0.5486804 -0.54792511
medium 0.4547148 -0.24046742  0.56534498 -0.6277884  0.14722974
large  0.4207158 -0.75389742 -0.46725510  0.1841806  0.04885769
null   0.4456943  0.45213319 -0.53174492 -0.4170461 -0.37450240
summary(myPr)
Importance of components:
                          PC1     PC2     PC3     PC4     PC5
Standard deviation     2.1461 0.54465 0.26631 0.13984 0.08419
Proportion of Variance 0.9212 0.05933 0.01418 0.00391 0.00142
Cumulative Proportion  0.9212 0.98049 0.99467 0.99858 1.00000
plot(myPr, type = "l")

biplot(myPr, scale = 0)

#extract PC scores...
str(myPr)
List of 5
 $ sdev    : num [1:5] 2.1461 0.5447 0.2663 0.1398 0.0842
 $ rotation: num [1:5, 1:5] 0.453 0.461 0.455 0.421 0.446 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:5] "micro" "small" "medium" "large" ...
  .. ..$ : chr [1:5] "PC1" "PC2" "PC3" "PC4" ...
 $ center  : Named num [1:5] 30.026 5.648 1.003 0.204 74.926
  ..- attr(*, "names")= chr [1:5] "micro" "small" "medium" "large" ...
 $ scale   : Named num [1:5] 198.08 36.67 7.05 1.94 510.97
  ..- attr(*, "names")= chr [1:5] "micro" "small" "medium" "large" ...
 $ x       : num [1:36680, 1:5] -0.288 -0.304 3.043 -0.16 -0.31 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:36680] "1" "2" "3" "4" ...
  .. ..$ : chr [1:5] "PC1" "PC2" "PC3" "PC4" ...
 - attr(*, "class")= chr "prcomp"
#myPr$x #checking principal component scores
firms2 <- cbind(firms_clean, myPr$x[, 1:2])
head(firms2)
#plot with ggplot...
require(ggplot2)
ggplot(firms2, aes(PC1, PC2)) + 
  stat_ellipse(geom = "polygon", col = "black", alpha = 0.5) + 
  geom_point(shape = 21, col = "black")

# correlations between variables and PCs...
cor(firms_clean[, 4:8], firms2[,9:10])
             PC1         PC2
micro  0.9729251  0.21821330
small  0.9883037  0.05130309
medium 0.9758680 -0.13097147
large  0.9029024 -0.41061303
null   0.9565091  0.24625602

3.3 What we have learned

  • More micro firms than small ones
  • …

3.4 How to use these data

We plan to use these for the following tasks:

  • predict the salaries using such information as proxy for the competition in the job market;
  • predict the total number of firms, using salary data;
  • geo-spatial plot for firms’ size
  • …

4 Analyze geographical data

4.1 Pre-processing

# preliminary checks
dim(geo)
[1] 36840     8
names(geo)
[1] "region"         "region_capital" "department"     "town_name"      "postal_code"    "CODGEO"        
[7] "latitude"       "longitude"     
head(geo)
str(geo)
'data.frame':   36840 obs. of  8 variables:
 $ region        : Factor w/ 28 levels "Alsace","Aquitaine",..: 27 27 27 27 27 27 27 27 27 27 ...
 $ region_capital: Factor w/ 28 levels "Ajaccio","Amiens",..: 14 14 14 14 14 14 14 14 14 14 ...
 $ department    : Factor w/ 102 levels "Ain","Aisne",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ town_name     : Factor w/ 34142 levels "Aast","Abainville",..: 1264 2487 2798 2821 3515 4001 4708 5689 5730 6645 ...
 $ postal_code   : Factor w/ 6106 levels "01000","01090",..: 26 19 29 26 17 1 23 16 17 17 ...
 $ CODGEO        : int  1024 1029 1038 1040 1245 1053 1065 1069 1072 1095 ...
 $ latitude      : num  46.3 46.4 46.3 46.4 46.1 ...
 $ longitude     : Factor w/ 1151 levels "","-","-0,75",..: 874 880 881 864 891 877 872 880 885 892 ...
summary(geo)
           region       region_capital           department             town_name      postal_code   
 Midi-Pyrénées: 3028   Toulouse: 3028   Pas-de-Calais :  898   Paris         :   21   51300  :   46  
 Rhône-Alpes  : 2890   Lyon    : 2890   Aisne         :  816   Sainte-Colombe:   14   51800  :   44  
 Lorraine     : 2336   Metz    : 2336   Somme         :  783   Saint-Sauveur :   12   70000  :   42  
 Aquitaine    : 2300   Bordeaux: 2300   Seine-Maritime:  747   Beaulieu      :   11   88500  :   42  
 Picardie     : 2295   Amiens  : 2295   Moselle       :  732   Saint-Sulpice :   11   80140  :   40  
 Bourgogne    : 2050   Dijon   : 2050   Côte-d'Or     :  709   Sainte-Marie  :   11   02160  :   38  
 (Other)      :21941   (Other) :21941   (Other)       :32155   (Other)       :36760   (Other):36588  
     CODGEO         latitude        longitude    
 Min.   : 1001   Min.   :41.39           : 2841  
 1st Qu.:24577   1st Qu.:45.22   2.433333:  105  
 Median :48191   Median :47.43   2.333333:  100  
 Mean   :46298   Mean   :47.00   1.833333:   99  
 3rd Qu.:67043   3rd Qu.:48.85   2.116667:   98  
 Max.   :97617   Max.   :51.08   2.25    :   94  
                 NA's   :2929    (Other) :33503  
# spot "," instead of "." in longitude
newLong       <- as.character(geo$longitude)    # copy the vector
sum(grep(",", newLong))                         # total commas
[1] 994302
ind_long_err  <- grep(",", newLong)             # indexing them
newLong       <- gsub(",", ".", newLong)        # substituting them with dots
indNA_Long    <- is.na(as.numeric((newLong)))   # spot NA
NAs introduced by coercion
geo$longitude[indNA_Long]                       # verify that they were actually missing
   [1]                                                                                                              
  [56]                                                                                                              
 [111]                                                                                                              
 [166]                                                                                                              
 [221]                                                                                                              
 [276]                                                                                                              
 [331]                                                                                         -             -   -  
 [386]       -   -                                                                                                  
 [441]                   -             -         -         -                     -     - -             -       -    
 [496]                                                                                                              
 [551]                                                                                                              
 [606]                                                                                                              
 [661]                                                                                                              
 [716]                                                                                                              
 [771]                                                                                                              
 [826]                                                                                                              
 [881]                                                                                                              
 [936]                                                                                                              
 [991]                    
 [ reached getOption("max.print") -- omitted 1905 entries ]
1151 Levels:  - -0,75 -0.008333 -0.016667 -0.01679 -0.025 -0.03127 -0.033333 -0.041667 -0.05 -0.058333 ... 9.516667
geo$longitude <- as.numeric(newLong)            # overwrite the longitude variable with the new one
NAs introduced by coercion
# Check for duplicated data (e.g., cities with different postal codes, that we dropped):
  # es. to verify it:  try on the initial dataset
  # sum(geo$nom_commune == "Paris")
  # ind_duplic <- geo$nom_commune == "Paris"
  # geo[ind_duplic,]
sum(duplicated.data.frame(geo)) 
[1] 117
# retaing unique postal cities
geo <- unique(geo, by = "CODGEO")
# check again
head(geo)
summary(geo)
           region       region_capital           department             town_name      postal_code   
 Midi-Pyrénées: 3021   Toulouse: 3021   Pas-de-Calais :  894   Paris         :   19   51300  :   46  
 Rhône-Alpes  : 2882   Lyon    : 2882   Aisne         :  816   Sainte-Colombe:   14   51800  :   44  
 Lorraine     : 2333   Metz    : 2333   Somme         :  782   Saint-Sauveur :   12   70000  :   42  
 Aquitaine    : 2296   Bordeaux: 2296   Seine-Maritime:  745   Beaulieu      :   11   88500  :   42  
 Picardie     : 2291   Amiens  : 2291   Moselle       :  730   Saint-Sulpice :   11   80140  :   40  
 Bourgogne    : 2046   Dijon   : 2046   Côte-d'Or     :  707   Sainte-Marie  :   11   02160  :   38  
 (Other)      :21854   (Other) :21854   (Other)       :32049   (Other)       :36645   (Other):36471  
     CODGEO         latitude       longitude      
 Min.   : 1001   Min.   :41.39   Min.   :-5.1000  
 1st Qu.:24562   1st Qu.:45.22   1st Qu.: 0.6833  
 Median :48180   Median :47.43   Median : 2.6167  
 Mean   :46276   Mean   :47.00   Mean   : 2.7329  
 3rd Qu.:67037   3rd Qu.:48.85   3rd Qu.: 4.8500  
 Max.   :97617   Max.   :51.08   Max.   : 9.5167  
                 NA's   :2925    NA's   :2903     

Assign lat and long values for NAs units:

require(ggmap)
# [ delete? ]
# # compare numbers of NA in latitude and longitude 
# sum(is.na(geo$latitude)) - sum(is.na(geo$longitude))
# # check if they match or not
# sum(!is.na(geo$latitude[indNA_Long]))
# # 64 obs are missing in longitude but not in latitude, hence 88 vice versa
# index of NAs and their total
indNA_coord = is.na(geo$latitude) | is.na(geo$longitude)
sum(indNA_coord)
[1] 2989
# NO MORE NEEDED BECAUSE IS A CSV FILE
    # # initialize variables
    # city_search = 0
    # res = as.data.frame(matrix(c(0, 0, 0), 1, 3))
    # names(res) = c("lon", "lat", "address")
    # 
    # # retrieve lat and long (Google API = 2500 request per day)
    # # my_iter = floor(sum(indNA_coord)/3)
    # for (i in 1:sum(indNA_coord)){
    # 
    #   # city searched
    #   city_search[i] = paste(c(as.character(NA_coord$town_name[i]), as.character(NA_coord$postal_code[i]), as.character(NA_coord$department[i]), "France"), sep=" ", collapse = ", ")
    #   
    #   # solution
    #   res[i,] = geocode(city_search[i], output = "latlona", source = c("google", "dsk"), messaging = FALSE)
    # 
    #   # retrieve still missing data, because of existing problems with API (up to 15 trials)
    #   j = 0
    #   while (any(is.na(res[i,])) & j < 25){
    #     res[i,] = geocode(city_search[i], output = "latlona", source = c("google", "dsk"), messaging = FALSE)
    #     j = j + 1
    #   }
    # }
    # # check the solution
    # sol = cbind(searched = city_search, res)
    # # save it as a csv file to save time
    # write.csv(retrieved_geo_NA[,2:3], "geo_NA_Final.csv", quote = FALSE, row.names=FALSE, fileEncoding = "UTF-8")
    
# read the created csv
retrieved_geo_NA = read.csv("geo_NA_Final.csv", header = T, encoding = "UTF-8")
# get only long and lat and assign to original NA 
geo$latitude[indNA_coord] = retrieved_geo_NA[,2]
geo$longitude[indNA_coord] = retrieved_geo_NA[,1]
# there are 37 still missing units, which are towns located in old colonies far from Europe
indNA_coord = is.na(geo$latitude) | is.na(geo$longitude)
sum(indNA_coord)
[1] 37
# exclude those towns
geo = geo[!indNA_coord,]
summary(geo)
           region       region_capital           department             town_name      postal_code   
 Midi-Pyrénées: 3021   Toulouse: 3021   Pas-de-Calais :  894   Paris         :   19   51300  :   46  
 Rhône-Alpes  : 2882   Lyon    : 2882   Aisne         :  816   Sainte-Colombe:   14   51800  :   44  
 Lorraine     : 2333   Metz    : 2333   Somme         :  782   Saint-Sauveur :   12   70000  :   42  
 Aquitaine    : 2296   Bordeaux: 2296   Seine-Maritime:  745   Beaulieu      :   11   88500  :   42  
 Picardie     : 2291   Amiens  : 2291   Moselle       :  730   Saint-Sulpice :   11   80140  :   40  
 Bourgogne    : 2046   Dijon   : 2046   Côte-d'Or     :  707   Sainte-Marie  :   11   02160  :   38  
 (Other)      :21817   (Other) :21817   (Other)       :32012   (Other)       :36608   (Other):36434  
     CODGEO         latitude        longitude       
 Min.   : 1001   Min.   :-21.38   Min.   :-63.0885  
 1st Qu.:24551   1st Qu.: 45.15   1st Qu.:  0.6667  
 Median :48162   Median : 47.38   Median :  2.6333  
 Mean   :46224   Mean   : 46.87   Mean   :  2.6617  
 3rd Qu.:67006   3rd Qu.: 48.83   3rd Qu.:  4.8667  
 Max.   :97611   Max.   : 51.08   Max.   : 55.8250  
                                                    

4.2 EDA

#install.packages("ggplot2")
#install.packages("ggmap")
require(ggplot2)
require(ggmap)
# plot france (center: 2.213749 46.227638)
# fra_center = as.numeric(geocode("France"))
fra_center = c(2.213749, 46.227638)
FraMap = ggmap(get_googlemap(center=fra_center, scale=2, zoom=5), extent="normal")
Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=46.227638,2.213749&zoom=5&size=640x640&scale=2&maptype=terrain&sensor=false
FraMap

# plot all towns available
geo_pos = as.data.frame(cbind(lon = geo$longitude, lat = geo$latitude))
geo_pos = geo_pos[complete.cases(geo_pos),]
FraMap +
  geom_point(aes(x=lon, y=lat), data=geo_pos, col="orange", alpha=0.1) 

# delete non-European countries
ind_nonEur = geo$latitude < 30 | geo$latitude > 70 |geo$longitude < -20 | geo$longitude > 20
sum(ind_nonEur)
[1] 92
geo = geo[!ind_nonEur,]
# plot all European towns available
geo_pos = as.data.frame(cbind(lon = geo$longitude, lat = geo$latitude))
geo_pos = geo_pos[complete.cases(geo_pos),]
FraMap +
  geom_point(aes(x=lon, y=lat), data=geo_pos, col="orange", alpha=0.1) 

4.3 What we have learned

Solved:

  • Why latitude is missing and not longitude?
  • There are some duplications.

To do:

  • What to do with non-European towns?

4.4 How to use these data

  • Compare European towns vs. old colonies?
  • Useful for all datasets/analyses

5 Analyze salary data

5.1 Pre-processing

# preliminary checks
dim(salary)
[1] 5136   26
names(salary)
 [1] "CODGEO"           "town"             "sal_general"      "sal_executive"    "sal_midManager"  
 [6] "sal_employee"     "sal_worker"       "sal_Females"      "sal_F_executive"  "sal_F_midManager"
[11] "sal_F_employee"   "sal_F_worker"     "sal_Males"        "sal_M_executive"  "sal_M_midManager"
[16] "sal_M_employee"   "sal_M_worker"     "sal_18_25"        "sal_26_50"        "sal_51plus"      
[21] "sal_F_18_25"      "sal_F_26_50"      "sal_F_51plus"     "sal_M_18_25"      "sal_M_26_50"     
[26] "sal_M_51plus"    
head(salary)
str(salary)
'data.frame':   5136 obs. of  26 variables:
 $ CODGEO          : chr  "01004" "01007" "01014" "01024" ...
 $ town            : Factor w/ 5085 levels "Abbeville","Ablis",..: 73 79 133 192 275 298 407 395 400 406 ...
 $ sal_general     : num  13.7 13.5 13.5 12.9 13 13.9 12.4 14 11.5 12.4 ...
 $ sal_executive   : num  24.2 22.1 27.6 21.8 22.8 22.2 24 23.1 21.2 23.4 ...
 $ sal_midManager  : num  15.5 14.7 15.6 14.1 14.1 15.1 13.1 15.3 13.5 14.1 ...
 $ sal_employee    : num  10.3 10.7 11.1 11 10.5 11 10.5 10.9 9.9 10.3 ...
 $ sal_worker      : num  11.2 11.4 11.1 11.3 11.1 11.4 10.4 11.3 10.5 10.5 ...
 $ sal_Females     : num  11.6 11.9 10.9 11.4 11.6 12.5 10.9 12.4 10.3 11 ...
 $ sal_F_executive : num  19.1 19 19.5 19 19.4 20.3 20.7 20.5 20.8 21.5 ...
 $ sal_F_midManager: num  13.2 13.3 11.7 13 13.6 14 11.8 13.9 12.3 13 ...
 $ sal_F_employee  : num  10.1 10.6 10.8 10.3 10.2 10.9 10.4 10.7 9.8 9.9 ...
 $ sal_F_worker    : num  9.6 10 9.5 9.9 9.8 10.5 9.3 10.3 9 9.5 ...
 $ sal_Males       : num  15 14.7 15.3 13.8 13.8 15.2 13.4 15.4 12.3 13.2 ...
 $ sal_M_executive : num  26.4 23.3 30.2 23 24.1 23.1 25.2 24.4 21.3 24 ...
 $ sal_M_midManager: num  16.7 15.8 17.2 14.7 14.4 15.9 13.8 16.3 14.2 14.9 ...
 $ sal_M_employee  : num  11 11.3 12.4 13.2 11.7 12.1 10.8 11.8 10.5 11.6 ...
 $ sal_M_worker    : num  11.6 11.7 11.8 11.6 11.4 11.7 10.8 11.6 11 10.9 ...
 $ sal_18_25       : num  10.5 9.8 9.3 9.6 9.4 9.7 9.3 9.7 9.6 9.7 ...
 $ sal_26_50       : num  13.7 13.8 13.3 12.9 12.8 14.1 12.5 13.9 11.5 12.3 ...
 $ sal_51plus      : num  16.1 14.6 16 14.2 15.2 15.4 13.3 16.7 12.7 13.7 ...
 $ sal_F_18_25     : num  9.7 9.2 8.9 9.3 9 9.5 8.9 9.7 9.2 9.3 ...
 $ sal_F_26_50     : num  11.8 12.2 10.6 11.4 11.8 12.8 11 12.4 10.3 11.2 ...
 $ sal_F_51plus    : num  12.5 12.5 12.5 12.2 12.3 13 11.5 13.8 11.3 11.4 ...
 $ sal_M_18_25     : num  11 10.2 9.6 9.7 9.7 9.9 9.6 9.6 10 9.9 ...
 $ sal_M_26_50     : num  14.9 14.9 15.1 13.8 13.4 15.3 13.3 15 12.3 13 ...
 $ sal_M_51plus    : num  18.6 16.4 18.6 15.9 16.9 17.2 14.9 19.3 13.9 15.4 ...
summary(salary)
    CODGEO                    town       sal_general    sal_executive  sal_midManager   sal_employee  
 Length:5136        Sainte-Marie:   4   Min.   :10.20   Min.   :16.0   Min.   :11.60   Min.   : 8.70  
 Class :character   Saint-Ouen  :   3   1st Qu.:12.10   1st Qu.:21.9   1st Qu.:13.80   1st Qu.:10.00  
 Mode  :character   Allonnes    :   2   Median :13.00   Median :23.2   Median :14.40   Median :10.40  
                    Andilly     :   2   Mean   :13.71   Mean   :23.7   Mean   :14.58   Mean   :10.56  
                    Bassens     :   2   3rd Qu.:14.40   3rd Qu.:24.9   3rd Qu.:15.10   3rd Qu.:10.90  
                    Beaumont    :   2   Max.   :43.30   Max.   :51.5   Max.   :54.60   Max.   :17.50  
                    (Other)     :5121                                                                 
   sal_worker     sal_Females    sal_F_executive sal_F_midManager sal_F_employee   sal_F_worker      sal_Males    
 Min.   : 8.30   Min.   : 9.30   Min.   :12.00   Min.   :10.60    Min.   : 8.70   Min.   : 6.100   Min.   :10.40  
 1st Qu.:10.60   1st Qu.:10.90   1st Qu.:18.80   1st Qu.:12.60    1st Qu.: 9.80   1st Qu.: 9.200   1st Qu.:12.90  
 Median :11.00   Median :11.50   Median :20.00   Median :13.10    Median :10.10   Median : 9.700   Median :14.10  
 Mean   :11.24   Mean   :12.04   Mean   :20.22   Mean   :13.27    Mean   :10.31   Mean   : 9.827   Mean   :14.85  
 3rd Qu.:11.60   3rd Qu.:12.70   3rd Qu.:21.40   3rd Qu.:13.80    3rd Qu.:10.60   3rd Qu.:10.200   3rd Qu.:15.80  
 Max.   :46.30   Max.   :26.70   Max.   :35.50   Max.   :19.00    Max.   :16.10   Max.   :28.100   Max.   :52.40  
                                                                                                                  
 sal_M_executive sal_M_midManager sal_M_employee   sal_M_worker    sal_18_25       sal_26_50      sal_51plus   
 Min.   :13.8    Min.   :11.80    Min.   : 8.00   Min.   : 8.9   Min.   : 7.90   Min.   : 9.7   Min.   :10.50  
 1st Qu.:23.1    1st Qu.:14.50    1st Qu.:10.50   1st Qu.:10.8   1st Qu.: 9.20   1st Qu.:12.0   1st Qu.:13.70  
 Median :24.6    Median :15.20    Median :11.10   Median :11.3   Median : 9.50   Median :12.9   Median :15.00  
 Mean   :25.2    Mean   :15.49    Mean   :11.27   Mean   :11.5   Mean   : 9.55   Mean   :13.5   Mean   :15.88  
 3rd Qu.:26.6    3rd Qu.:16.00    3rd Qu.:11.80   3rd Qu.:11.9   3rd Qu.: 9.70   3rd Qu.:14.3   3rd Qu.:16.90  
 Max.   :58.0    Max.   :93.40    Max.   :23.50   Max.   :53.2   Max.   :60.60   Max.   :38.1   Max.   :56.90  
                                                                                                               
  sal_F_18_25      sal_F_26_50     sal_F_51plus    sal_M_18_25      sal_M_26_50     sal_M_51plus  
 Min.   : 7.500   Min.   : 9.10   Min.   : 9.50   Min.   : 7.800   Min.   : 9.60   Min.   :10.80  
 1st Qu.: 8.900   1st Qu.:10.90   1st Qu.:11.70   1st Qu.: 9.400   1st Qu.:12.70   1st Qu.:14.90  
 Median : 9.100   Median :11.60   Median :12.60   Median : 9.700   Median :13.80   Median :16.60  
 Mean   : 9.162   Mean   :12.06   Mean   :13.17   Mean   : 9.821   Mean   :14.49   Mean   :17.68  
 3rd Qu.: 9.400   3rd Qu.:12.70   3rd Qu.:14.00   3rd Qu.:10.000   3rd Qu.:15.50   3rd Qu.:19.00  
 Max.   :12.000   Max.   :26.60   Max.   :31.00   Max.   :93.300   Max.   :45.40   Max.   :68.60  
                                                                                                  
# Drop unnecessary columns (town name repeats in other table, is it surely possible to merge them?)
names(salary)
 [1] "CODGEO"           "town"             "sal_general"      "sal_executive"    "sal_midManager"  
 [6] "sal_employee"     "sal_worker"       "sal_Females"      "sal_F_executive"  "sal_F_midManager"
[11] "sal_F_employee"   "sal_F_worker"     "sal_Males"        "sal_M_executive"  "sal_M_midManager"
[16] "sal_M_employee"   "sal_M_worker"     "sal_18_25"        "sal_26_50"        "sal_51plus"      
[21] "sal_F_18_25"      "sal_F_26_50"      "sal_F_51plus"     "sal_M_18_25"      "sal_M_26_50"     
[26] "sal_M_51plus"    
# salary <- subset(salary, select = -c(town))
# Convert CODGEO to numeric
salary$CODGEO <- as.numeric(as.character(salary$CODGEO))
# Check for duplicated data
sum(duplicated.data.frame(salary))
[1] 0

5.2 EDA

Univariate analysis comparing various job categories for both genders:

require(ggplot2)
#  number of units
n_sex <- length(salary$sal_Females)
# vector representing males and females
Label <- c(rep("M", n_sex*5), rep("F", n_sex*5))
# vector representing the variable considered
Variable <- c(rep("General", n_sex), 
             rep("Executive", n_sex),
             rep("MidManager", n_sex),
             rep("Employee", n_sex),
             rep("Worker",n_sex),
             rep("General", n_sex), 
             rep("Executive", n_sex),
             rep("MidManager", n_sex),
             rep("Employee", n_sex),
             rep("Worker",n_sex))
# merge these data
sal_sex = cbind.data.frame(Label = Label, 
             value = c(salary$sal_Males, salary$sal_M_executive, salary$sal_M_midManager, salary$sal_M_employee, salary$sal_M_worker,
                       salary$sal_Females, salary$sal_F_executive, salary$sal_F_midManager, salary$sal_F_employee, salary$sal_F_worker),
             Variable = Variable)
# plotting phase
p <-  ggplot(data = sal_sex, aes(x=Label, y=value)) +
      geom_boxplot(aes(fill = Label)) +
      # not color points replacing colour = group instead of colour=Label
      geom_point(aes(y=value, colour=Label), position = position_dodge(width=0.75)) +
      facet_wrap( ~ Variable, scales="free") +
      xlab("x-axis") + ylab("y-axis") + ggtitle("Gender comparison") +
      stat_boxplot(geom = "errorbar", width = 0.5)
      # p <- p + guides(fill=guide_legend(title="Legend"))
p

# excluding outliers
p2 <- ggplot(data = sal_sex, aes(x=Label, y=value)) +
      scale_y_continuous(limits = quantile(sal_sex$value, c(0, 0.9))) +
      geom_boxplot(aes(fill = Label)) +
      # not color points replacing colour = group instead of colour=Label
      geom_point(aes(y=value, colour=Label), position = position_dodge(width=0.75)) +
      facet_wrap( ~ Variable, scales="free") +
      xlab("x-axis") + ylab("y-axis") + ggtitle("Gender comparison excluding the last decile") +
      # p <- p + guides(fill=guide_legend(title="Legend"))
      stat_boxplot(geom = "errorbar", width = 0.5)
p2

Highlight bivariate relations using scatter matrices:

# most general pairs
pairs(salary[c(3:8, 13, 18:20)])

# pairs highlighting genders' differences
pairs(salary[c(9:12, 14:17)])

Fit a regression model to predict the salaries of people in age 26-50 using as regressor 51+ years:

# fit and show OLS estimate
plot(salary$sal_26_50 ~ salary$sal_51plus)
fit_LM_26_50 = glm(salary$sal_26_50 ~ salary$sal_51plus, data = salary)
abline(fit_LM_26_50, lwd=3, col="red")

# diagnostics
summary(fit_LM_26_50)

Call:
glm(formula = salary$sal_26_50 ~ salary$sal_51plus, data = salary)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-5.7024  -0.4395  -0.0228   0.4069   9.2197  

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)       3.605893   0.048896   73.75   <2e-16 ***
salary$sal_51plus 0.622877   0.003004  207.35   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian family taken to be 0.595825)

    Null deviance: 28676  on 5135  degrees of freedom
Residual deviance:  3059  on 5134  degrees of freedom
AIC: 11920

Number of Fisher Scoring iterations: 2
plot(fit_LM_26_50)

Same as before but adding polynomials which are evauated using 10-folds cross validation:

require(boot)
set.seed(1)
# k-Fold Cross-Validation
cv.err.K = rep(0, 5)
cv.err.K = rbind(cv.err.K, cv.err.K)
for (i in 1:5){
  fit_LM_26_50.K = glm(sal_26_50 ~ poly(sal_51plus, i), data = salary)
  cv.err.K[,i] = cv.glm(salary, fit_LM_26_50.K, K = 10)$delta[1]
}
# plotting results
plot(cv.err.K[1,], type = 'l', col = 'red', xlab = "Polynomials' order", 
     ylab = "10-folds CV", main = "CV and adjusted CV for different polynomials")
lines(cv.err.K[2,], col = 'green')
points(which.min(cv.err.K), cv.err.K[1, which.min(cv.err.K)], col = "red", cex=2, pch=20)
legend('topright', legend = c('CV', 'Adj. CV'), col = c('red', 'green'), pch = 10)

Predicting sal_executive with 5 regressers using Lasso with CV and 10-folds CV: [ PROBLEM FOR COLLINEARITY? use ridge?] [ add polynomials? ]

require(glmnet)
set.seed(1)
# crate an X matrix excluding intercept
# x = cbind(salary$sal_midManager, salary$sal_employee, salary$sal_worker, salary$sal_18_25, salary$sal_51plus)
# y = salary$sal_executive
# probably for ridge
names(salary)
 [1] "CODGEO"           "town"             "sal_general"      "sal_executive"    "sal_midManager"  
 [6] "sal_employee"     "sal_worker"       "sal_Females"      "sal_F_executive"  "sal_F_midManager"
[11] "sal_F_employee"   "sal_F_worker"     "sal_Males"        "sal_M_executive"  "sal_M_midManager"
[16] "sal_M_employee"   "sal_M_worker"     "sal_18_25"        "sal_26_50"        "sal_51plus"      
[21] "sal_F_18_25"      "sal_F_26_50"      "sal_F_51plus"     "sal_M_18_25"      "sal_M_26_50"     
[26] "sal_M_51plus"    
y = salary$sal_26_50
x = as.matrix(cbind(salary[, c(4:8, 13, 18, 20)]))
names(x)
NULL
# grid for lambda values
grid = 10^seq(1, -5, length = 100)
lasso.mod = glmnet(x, y, alpha = 1, lambda = grid)
dim(coef(lasso.mod))
[1]   9 100
# the norms are increasing in value because of the shrinkage
lasso.mod$lambda[1]                  # lambda value
[1] 10
sqrt(sum(coef(lasso.mod)[-1,1]^2))   # L2 norm of its coeff
[1] 0
lasso.mod$lambda[51]
[1] 0.009326033
sqrt(sum(coef(lasso.mod)[-1,51]^2))
[1] 0.8159349
lasso.mod$lambda[90]
[1] 4.037017e-05
sqrt(sum(coef(lasso.mod)[-1,90]^2))
[1] 0.9722552
# predict values for a new lambda, e.g. OLS
OLS = predict(lasso.mod, s = 0, type = "coefficients")[1:nrow(coef(lasso.mod)),]
# split the date leaving the 10% for CV
train = sample(1:nrow(salary), floor(nrow(salary)*0.9))
test = -train
y.test = y[test]
lasso.mod = glmnet(x[train,], y[train], alpha = 1, lambda = grid, thresh = 1e-12)
err.i = rep("NA", length(grid))
for (i in 1:length(grid)){
  lasso.pred = predict(lasso.mod, s = grid[i], newx = x[test,])
  err.i[i] = mean((lasso.pred - y.test)^2)
}
plot(log(grid), err.i, xlab = 'log Lambda', ylab = 'test set MSE', 
     main = 'Test MSE among different Lambdas', ylim = c(0, 10))
bestlam = grid[which.min(err.i)]
points(log(grid)[bestlam], err.i[bestlam], col ="red", cex=2, pch=20)

# high values of lambda are like fitting just the intercept
# using 10 folds CV
set.seed (1)
cv.out = cv.glmnet(x[train ,], y[train], alpha = 1)
plot(cv.out)
bestlam = cv.out$lambda.min
bestlam
[1] 0.001484687
lasso.pred = predict(lasso.mod, s=bestlam, newx=x[test ,])
mean((lasso.pred - y.test)^2)
[1] 0.05322848
# using best subset
require(leaps)
dataBS = as.data.frame(cbind(y, x))
best.sub = regsubsets(y ~ x, data = dataBS, nvmax = nrow(coef(lasso.mod)))
best.sub.summary = summary(best.sub)
names(best.sub.summary)
[1] "which"  "rsq"    "rss"    "adjr2"  "cp"     "bic"    "outmat" "obj"   
# manual plotting
par(mfrow =c(2,2))

# rsq
plot(best.sub.summary$rsq , xlab="Number of Variables", ylab="Rsq", type="l")
ind_Rsq = which.max(best.sub.summary$rsq)
points(ind_Rsq, best.sub.summary$adjr2[ind_Rsq], col ="red", cex=2, pch=20)
# adjRsq
plot(best.sub.summary$adjr2 ,xlab="Number of Variables", ylab="Adjusted RSq", type="l")
ind_adjRsq = which.max(best.sub.summary$adjr2)
points(ind_adjRsq, best.sub.summary$adjr2[ind_adjRsq], col ="red", cex=2, pch=20)
# Cp
plot(best.sub.summary$cp ,xlab="Number of Variables", ylab="Cp", type="l")
ind_Cp = which.min(best.sub.summary$cp)
points(ind_Cp, best.sub.summary$cp[ind_adjRsq], col ="red", cex=2, pch=20)
# bic
plot(best.sub.summary$bic ,xlab="Number of Variables", ylab="bic", type="l")
ind_bic = which.min(best.sub.summary$bic)
points(ind_bic, best.sub.summary$bic[ind_bic], col ="red", cex=2, pch=20)
# built-in plots
?plot.regsubsets
par(mfrow=c(1,1))

plot(best.sub, scale = "r2")

plot(best.sub, scale = "adjr2")

plot(best.sub, scale = "Cp")

plot(best.sub, scale = "bic")

# retrieve the model with min BIC
coefficients(best.sub, which.min(best.sub.summary$bic))
    (Intercept)  xsal_executive xsal_midManager   xsal_employee     xsal_worker    xsal_Females      xsal_Males 
    -0.25048976     -0.01269488      0.04760461      0.04132812      0.04170107      0.56842466      0.73332696 
     xsal_18_25     xsal_51plus 
    -0.07044077     -0.29040179 

Model salaries for people aged 18-25, which seems more difficult to predict:

plot(y = salary$sal_18_25, x = salary$sal_51plus)

# there is a clear outlier
ind_out = which(salary$sal_18_25 == max(salary$sal_18_25))
# check if is so in all dimensions (apparently not)
col_col <- rep("black", nrow(salary))
col_col[ind_out] <- "red"
pairs(salary[, c(3:8, 13, 18:20)], col = col_col)

# evaluate an OLS fit
fit_LM_18_25 = lm(salary$sal_18_25 ~ salary$sal_51plus)
plot(y = salary$sal_18_25, x = salary$sal_51plus)
abline(fit_LM_18_25, lwd=3, col="red")

# # using more predictors
# fit_LM_2 = lm(salary$sal_M_18_25 ~ salary$sal_26_50 + salary$sal_51plus + salary$sal_general + salary$sal_executive + 
#                 salary$sal_midManager + salary$sal_employee + salary$sal_worker)
# 
# summary(fit_LM_2)

PCA for salary:

myPr <- prcomp(salary[, 3:26], scale = TRUE)
myPr
Standard deviations (1, .., p=24):
 [1] 4.06846384 1.42984463 1.05721988 1.01203703 0.88322482 0.75520781 0.66774657 0.62302595 0.59239652 0.54263890
[11] 0.42693995 0.34427033 0.24469681 0.18858814 0.09684001 0.08031760 0.07067011 0.06516261 0.05970587 0.05220597
[21] 0.04552586 0.03013818 0.02484499 0.01231068

Rotation (n x k) = (24 x 24):
                        PC1          PC2          PC3          PC4         PC5         PC6          PC7
sal_general      0.24256129  0.025437648  0.046824213 -0.001482692  0.03076820 -0.06451689  0.055294659
sal_executive    0.20442703  0.090840110  0.370504255  0.230155704  0.21350371  0.06702947  0.104690622
sal_midManager   0.18190516 -0.298541632  0.240776890 -0.140328052 -0.46401004  0.11032025 -0.039816322
sal_employee     0.22422720  0.071074352 -0.247268798 -0.004042860 -0.12740320 -0.26329990  0.014366575
sal_worker       0.20363449  0.025689939 -0.009919171 -0.462312126  0.22036076  0.13598966 -0.117636498
sal_Females      0.23675441  0.077421630 -0.127192423  0.117888907 -0.04566119 -0.00897671 -0.086720278
sal_F_executive  0.17552119  0.100201572  0.134347070  0.360580387  0.05685253  0.19723908 -0.624677246
sal_F_midManager 0.20157041  0.055250814 -0.153976516  0.149031116 -0.30014668  0.08207409 -0.170354268
sal_F_employee   0.21685115  0.070759163 -0.278486962  0.062336516 -0.16747839 -0.07036376 -0.061129067
sal_F_worker     0.17716298  0.057667740 -0.157481946 -0.339303819  0.23212123  0.26673263 -0.223203762
sal_Males        0.23974996  0.009058135  0.108615013 -0.027686738  0.05507451 -0.09080437  0.109670620
sal_M_executive  0.19790649  0.083491339  0.384046886  0.195161560  0.22445421  0.03841633  0.283820931
sal_M_midManager 0.15142477 -0.358343778  0.337293684 -0.200802616 -0.45538809  0.09851366  0.008813757
sal_M_employee   0.18899143  0.053355476 -0.169087541 -0.135169015 -0.04337624 -0.57642720  0.149112333
sal_M_worker     0.19759147  0.020542131  0.014779054 -0.466418793  0.22279564  0.11403974 -0.098836166
sal_18_25        0.10622764 -0.581187020 -0.184539817  0.163255107  0.26393156 -0.01583997  0.002640546
sal_26_50        0.24075123  0.021743461  0.009686478 -0.023148467 -0.01895745 -0.07602419  0.036997646
sal_51plus       0.23674140  0.067331626  0.096987533  0.053894395  0.06972164 -0.03408813  0.097629771
sal_F_18_25      0.15872871 -0.016579718 -0.401651320  0.127716025 -0.07694791  0.60485366  0.529432039
sal_F_26_50      0.23464487  0.070413931 -0.134126680  0.098562960 -0.03913166 -0.02936739 -0.075060725
sal_F_51plus     0.22653207  0.089063623 -0.110555475  0.170095574 -0.07331241  0.01993173 -0.147758727
sal_M_18_25      0.08512445 -0.608375999 -0.120979871  0.149766048  0.29054243 -0.13104749 -0.094878245
sal_M_26_50      0.23769037  0.003296056  0.072159424 -0.061623773 -0.01062363 -0.10331821  0.086592887
sal_M_51plus     0.23172375  0.063471847  0.155656542  0.046627399  0.10720803 -0.05185343  0.173774628
                          PC8          PC9        PC10         PC11         PC12         PC13         PC14
sal_general      -0.109456636  0.027674801 -0.17031410  0.080977040 -0.072080349  0.034011467 -0.087574401
sal_executive     0.145407429 -0.095281235  0.27788374 -0.116184031 -0.091277662 -0.050262712 -0.094749290
sal_midManager    0.040856787 -0.057858294  0.09569372  0.035330196  0.042527735  0.048701263  0.007430303
sal_employee      0.145665182 -0.024675374  0.13257444 -0.327104176  0.043849057  0.101729065 -0.054861091
sal_worker        0.083375097  0.292638010  0.11437721 -0.009936159  0.020419566 -0.067190044  0.073124691
sal_Females      -0.154060756  0.014970640 -0.13019350 -0.005154802 -0.052074804 -0.166052462  0.362943040
sal_F_executive   0.515695940  0.024647934 -0.19920315  0.053743470 -0.029629692  0.150284303 -0.070732588
sal_F_midManager -0.244385975  0.109517724  0.61073768  0.498058587 -0.074307675  0.125149260 -0.106259680
sal_F_employee   -0.068005412  0.061094057  0.15347166 -0.656257011 -0.053673559  0.217741396 -0.203891674
sal_F_worker     -0.157579547 -0.777224505  0.05063640  0.007552204  0.066475744  0.056051310 -0.048241531
sal_Males        -0.104435046  0.018817088 -0.18588939  0.102679421 -0.081239041  0.098859708 -0.255477734
sal_M_executive   0.005970518 -0.115734021  0.37274958 -0.172453447 -0.060289875 -0.220371258  0.201825172
sal_M_midManager  0.102547206 -0.109776798 -0.12475279 -0.157598363  0.077146650 -0.065776558  0.139626301
sal_M_employee    0.540346236 -0.219903667  0.08926476  0.293843520  0.121124001 -0.091199655  0.106345961
sal_M_worker      0.127142364  0.436609055  0.12512635 -0.024954387 -0.004215415 -0.041622685  0.045210768
sal_18_25         0.001207696  0.023279317  0.04512387  0.017294431  0.002633665  0.013002070 -0.016293108
sal_26_50        -0.096505574  0.002769069 -0.20498532  0.073845015 -0.364981228 -0.100043127 -0.098288179
sal_51plus       -0.145711593  0.074302719 -0.14219097  0.075553331  0.399980948  0.234263596  0.011064433
sal_F_18_25       0.330226773  0.021573772 -0.10397906  0.086328029 -0.000224029 -0.008227617 -0.025813810
sal_F_26_50      -0.157937046  0.005991692 -0.15389749 -0.003958265 -0.313921974  0.039401540  0.651433643
sal_F_51plus     -0.179672634  0.072336569 -0.10300216 -0.029610665  0.522101524 -0.660491905 -0.132164963
sal_M_18_25      -0.061587053  0.020934887  0.05728994  0.001050080  0.009639714 -0.004391171 -0.001068603
sal_M_26_50      -0.080401426 -0.008463477 -0.22436907  0.096794152 -0.380087545 -0.163926845 -0.432693213
sal_M_51plus     -0.146634702  0.053171099 -0.15006742  0.092894490  0.355160148  0.510074815  0.066303930
                        PC15         PC16        PC17         PC18          PC19         PC20         PC21
sal_general       0.11373308  0.537564433  0.02743203  0.017489988 -1.159451e-01 -0.004941698 -0.186444087
sal_executive     0.21491568  0.036365780  0.60389368  0.183292088  1.113154e-02  0.209707749  0.237625932
sal_midManager    0.63673058 -0.057984682 -0.28448202 -0.073557676 -1.579773e-01  0.048891526  0.145407708
sal_employee     -0.07035113  0.067292007  0.17510667 -0.754946928 -1.146551e-01  0.037348700  0.097556946
sal_worker        0.06597172  0.060818933  0.11187552  0.010201055  1.064764e-01 -0.667777877  0.259563686
sal_Females      -0.03857624  0.369567679 -0.11208987  0.081286141  2.356819e-02  0.155294284  0.414226899
sal_F_executive  -0.07123694 -0.015017006 -0.15296912 -0.053925877  2.295057e-05 -0.068963399 -0.063321530
sal_F_midManager -0.21693732  0.007488834  0.07002096  0.014105017  4.580890e-02 -0.026142503 -0.058101242
sal_F_employee    0.02547217 -0.043943649 -0.14364839  0.483278945  7.405760e-02 -0.061767647 -0.087559551
sal_F_worker     -0.01598948 -0.019478631 -0.01489252 -0.012757413 -1.294547e-02  0.078849344 -0.040868629
sal_Males        -0.16183855  0.397833221 -0.16104495  0.065851739 -1.876303e-01  0.002719009  0.045217001
sal_M_executive  -0.15473575 -0.031433765 -0.48853916 -0.147678103 -1.172190e-02 -0.169556473 -0.197052690
sal_M_midManager -0.52596569  0.054175552  0.25541377  0.073609475  1.327127e-01 -0.021557735 -0.109754841
sal_M_employee    0.02179142 -0.032252775 -0.05884846  0.283858888  4.685199e-02 -0.008460942 -0.035280604
sal_M_worker     -0.07283739 -0.075671203 -0.09758683  0.006946048 -8.677017e-02  0.610432699 -0.203024554
sal_18_25         0.11267039  0.136111542 -0.07112277 -0.109030858  6.634365e-01  0.107558876 -0.034850157
sal_26_50         0.15596215 -0.240473153  0.12380994 -0.080862049  6.569177e-02 -0.023716404 -0.117863633
sal_51plus        0.20197212 -0.010017352  0.19641903 -0.041589840  3.230681e-02 -0.164704064 -0.540731132
sal_F_18_25      -0.02877802 -0.022084438  0.01487853  0.017465862 -1.179785e-01 -0.021515015  0.001253538
sal_F_26_50       0.07061276 -0.232593633  0.12772260  0.028461809 -2.675494e-02 -0.014024159 -0.150941457
sal_F_51plus      0.01971171 -0.153288200  0.01638906  0.022108181 -5.690660e-04  0.043471311  0.035742420
sal_M_18_25      -0.11587697 -0.154164465  0.07360042  0.100079168 -6.191465e-01 -0.107431569  0.032360802
sal_M_26_50      -0.12254212 -0.331702528 -0.09163891 -0.056093094  1.197454e-01 -0.039867540  0.095574005
sal_M_51plus     -0.16311384 -0.318822068 -0.12772664  0.009630106  9.046612e-02  0.099182609  0.430473611
                         PC22         PC23          PC24
sal_general      -0.171634618 -0.318250742 -0.6211947949
sal_executive     0.094261014  0.051498899 -0.0009957993
sal_midManager    0.074636362  0.046910335 -0.0028543575
sal_employee      0.039721344  0.018583473 -0.0002310930
sal_worker        0.033684544 -0.025598837 -0.0021445691
sal_Females      -0.405780691  0.378673412  0.1835641555
sal_F_executive  -0.024249456 -0.013372003  0.0009117450
sal_F_midManager -0.025079710 -0.016977172  0.0013027139
sal_F_employee   -0.033287099 -0.022210938  0.0014008536
sal_F_worker     -0.004198171  0.001678796  0.0003570630
sal_Males         0.551474149 -0.042726401  0.4589400496
sal_M_executive  -0.077600321 -0.040965609  0.0010058968
sal_M_midManager -0.056017369 -0.034265555  0.0021278696
sal_M_employee   -0.011036698 -0.007791783 -0.0002416376
sal_M_worker     -0.024335767  0.030257103  0.0025064712
sal_18_25         0.119248356 -0.037628737  0.0160530627
sal_26_50        -0.419501092 -0.475425046  0.4460448356
sal_51plus       -0.144167569  0.439057354  0.1690205439
sal_F_18_25      -0.008376178  0.002582028 -0.0034582040
sal_F_26_50       0.456530696 -0.027601748 -0.1483871402
sal_F_51plus      0.177587916 -0.187687161 -0.0459163395
sal_M_18_25      -0.129213299  0.043266374 -0.0149062160
sal_M_26_50       0.028289510  0.477463900 -0.3177600192
sal_M_51plus     -0.062171646 -0.238321858 -0.1290375524
summary(myPr)
Importance of components:
                          PC1     PC2     PC3     PC4    PC5     PC6     PC7     PC8     PC9    PC10    PC11
Standard deviation     4.0685 1.42984 1.05722 1.01204 0.8832 0.75521 0.66775 0.62303 0.59240 0.54264 0.42694
Proportion of Variance 0.6897 0.08519 0.04657 0.04268 0.0325 0.02376 0.01858 0.01617 0.01462 0.01227 0.00759
Cumulative Proportion  0.6897 0.77487 0.82144 0.86412 0.8966 0.92038 0.93896 0.95514 0.96976 0.98203 0.98962
                          PC12    PC13    PC14    PC15    PC16    PC17    PC18    PC19    PC20    PC21    PC22
Standard deviation     0.34427 0.24470 0.18859 0.09684 0.08032 0.07067 0.06516 0.05971 0.05221 0.04553 0.03014
Proportion of Variance 0.00494 0.00249 0.00148 0.00039 0.00027 0.00021 0.00018 0.00015 0.00011 0.00009 0.00004
Cumulative Proportion  0.99456 0.99706 0.99854 0.99893 0.99920 0.99940 0.99958 0.99973 0.99984 0.99993 0.99997
                          PC23    PC24
Standard deviation     0.02484 0.01231
Proportion of Variance 0.00003 0.00001
Cumulative Proportion  0.99999 1.00000
plot(myPr, type = "l")

biplot(myPr, scale = 0, cex = 0.5)

str(myPr)
List of 5
 $ sdev    : num [1:24] 4.068 1.43 1.057 1.012 0.883 ...
 $ rotation: num [1:24, 1:24] 0.243 0.204 0.182 0.224 0.204 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:24] "sal_general" "sal_executive" "sal_midManager" "sal_employee" ...
  .. ..$ : chr [1:24] "PC1" "PC2" "PC3" "PC4" ...
 $ center  : Named num [1:24] 13.7 23.7 14.6 10.6 11.2 ...
  ..- attr(*, "names")= chr [1:24] "sal_general" "sal_executive" "sal_midManager" "sal_employee" ...
 $ scale   : Named num [1:24] 2.559 2.836 1.49 0.812 1.222 ...
  ..- attr(*, "names")= chr [1:24] "sal_general" "sal_executive" "sal_midManager" "sal_employee" ...
 $ x       : num [1:5136, 1:24] 0.315 -0.185 0.443 -0.74 -0.922 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : NULL
  .. ..$ : chr [1:24] "PC1" "PC2" "PC3" "PC4" ...
 - attr(*, "class")= chr "prcomp"
#myPr$x #checking principal component scores
salary2 <- cbind(salary, myPr$x[, 1:2])
head(salary2)
#plot with ggplot...
#require(ggplot2)
ggplot(salary2, aes(PC1, PC2)) + 
  stat_ellipse(geom = "polygon", col = "black", alpha = 0.5) + 
  geom_point(shape = 21, col = "black")

# correlations between variables and PCs...
cor(salary[, 3:26], salary2[,27:28])
                       PC1          PC2
sal_general      0.9868518  0.036371885
sal_executive    0.8317040  0.129887244
sal_midManager   0.7400745 -0.426868150
sal_employee     0.9122603  0.101625280
sal_worker       0.8284796  0.036732621
sal_Females      0.9632267  0.110700902
sal_F_executive  0.7141016  0.143272680
sal_F_midManager 0.8200819  0.079000080
sal_F_employee   0.8822511  0.101174610
sal_F_worker     0.7207812  0.082455908
sal_Males        0.9754141  0.012951726
sal_M_executive  0.8051754  0.119379642
sal_M_midManager 0.6160662 -0.512375927
sal_M_employee   0.7689048  0.076290041
sal_M_worker     0.8038938  0.029372055
sal_18_25        0.4321833 -0.831007140
sal_26_50        0.9794877  0.031089772
sal_51plus       0.9631738  0.096273763
sal_F_18_25      0.6457820 -0.023706420
sal_F_26_50      0.9546442  0.100680982
sal_F_51plus     0.9216375  0.127347143
sal_M_18_25      0.3463258 -0.869883156
sal_M_26_50      0.9670347  0.004712847
sal_M_51plus     0.9427597  0.090754879

5.3 What we have learned

  • Check the outlier in sal_18_25: which city is it, etc.
  • evaluate possible multicollinearity
  • Try a regression with all predictors ans lasso

5.4 How to use these data

  • …

6 Analyze population data

6.1 Pre-processing

# preliminary checks
names(population)
[1] "NIVGEO"         "CODGEO"         "LIBGEO"         "MOCO"           "ageCateg5"      "sex"           
[7] "peopleCategNum"
summary(population)
 NIVGEO           CODGEO                     LIBGEO             MOCO         ageCateg5       sex     
 COM:8536584   Length:8536584     Sainte-Colombe:   3094   Min.   :11.00   Min.   : 0   Min.   :1.0  
               Class :character   Saint-Sauveur :   2618   1st Qu.:12.00   1st Qu.:20   1st Qu.:1.0  
               Mode  :character   Sainte-Marie  :   2618   Median :22.00   Median :40   Median :1.5  
                                  Beaulieu      :   2380   Mean   :21.71   Mean   :40   Mean   :1.5  
                                  Le Pin        :   2380   3rd Qu.:31.00   3rd Qu.:60   3rd Qu.:2.0  
                                  Saint-Aubin   :   2380   Max.   :32.00   Max.   :80   Max.   :2.0  
                                  (Other)       :8521114                                             
 peopleCategNum    
 Min.   :    0.00  
 1st Qu.:    0.00  
 Median :    0.00  
 Mean   :    7.45  
 3rd Qu.:    3.00  
 Max.   :48873.00  
                   
# Drop unnecessary columns (NIVGEO is the same for all)
population <- subset(population, select = -c(NIVGEO, LIBGEO))
# converting CODGEO to numeric
population$CODGEO <- as.numeric(population$CODGEO)
# Refactor sex and MOCO
population$MOCO <- factor(population$MOCO, levels = c(11,12,21,22,23,31,32),
                          labels = c("children_living_with_two_parents", "children living with one parent",
                                     "adults_living_in_couple_without_child", "adults_living_in_couple_with_children",
                                     "adults_living_alone_with_children","persons not from family living in the home",
                                     "persons_living_alone"))
population$sex <- factor(population$sex, levels = c(1,2), labels = c("Male", "Female"))
head(population)
# Take out rows with NB (number of people in this category) equal to 0
population <- population[population$peopleCategNum != 0,]
head(population)
summary(population)
     CODGEO                                              MOCO          ageCateg5         sex         
 Min.   : 1001   children_living_with_two_parents          :337182   Min.   : 0.00   Male  :1106500  
 1st Qu.:27181   children living with one parent           :192130   1st Qu.:25.00   Female:1104453  
 Median :50394   adults_living_in_couple_without_child     :529268   Median :45.00                   
 Mean   :48360   adults_living_in_couple_with_children     :451501   Mean   :41.77                   
 3rd Qu.:69154   adults_living_alone_with_children         :160976   3rd Qu.:60.00                   
 Max.   :97424   persons not from family living in the home:178635   Max.   :80.00                   
                 persons_living_alone                      :361261                                   
 peopleCategNum    
 Min.   :    1.00  
 1st Qu.:    4.00  
 Median :    8.00  
 Mean   :   28.75  
 3rd Qu.:   20.00  
 Max.   :48873.00  
                   

6.2 EDA

#Compare age categories:
library(ggplot2)
#  number of units
n_cat <- length(population$CODGEO)
# extract unique categories
uniq_cat <- unique(population$ageCateg5!=5)
# vector representing sex for each category
Label <- c(rep(c('Male', 'Female'), n_cat))
# vector representing the variable considered
Variable <- rep(uniq_cat, n_cat/length(uniq_cat))
Value=population$peopleCategNum
# merge these data
#pop_categ = cbind.data.frame(Label = Label, 
#             value = Value,
#             Variable = Variable)
#p <- ggplot(data = pop_categ, aes(x=Label, y=value)) 
#p <- p + geom_boxplot(aes(fill = Label))
# if you want color for points replace group with colour=Label
#p <- p + geom_point(aes(y=value, colour=Label), position = position_dodge(width=0.75))
#p <- p + facet_wrap( ~ Variable, scales="free")
#p <- p + xlab("x-axis") + ylab("y-axis") + ggtitle("Category comparison")
# p <- p + guides(fill=guide_legend(title="Legend"))
#p
# Restructure population data to produce the demographic profile per town
# install.packages("plyr")
library(plyr)
population_per_town_data <- ddply(population, .(CODGEO), function(population) {
  data.frame(total_population = sum(population$peopleCategNum),
             male = sum(population[population$sex == "Male",]$peopleCategNum),
             female = sum(population[population$sex == "Female",]$peopleCategNum),
             child = sum(population[population$ageCateg5 %in% seq(0, 10, by=5),]$peopleCategNum),
             elderly = sum(population[population$ageCateg5 %in% seq(65, 80, by=5),]$peopleCategNum),
             workforce = sum(population[population$ageCateg5 %in% seq(15, 60, by=5),]$peopleCategNum) 
  )})
population_per_town_data$dependent <- population_per_town_data$child + population_per_town_data$elderly
population_per_town_data$sex_ratio <- ifelse(population_per_town_data$female==0, 0, population_per_town_data$male / population_per_town_data$female)
population_per_town_data$dependency_ratio <- ifelse(population_per_town_data$workforce==0, 0, population_per_town_data$dependent / population_per_town_data$workforce)
population_per_town_data$aged_dependency_ratio <- ifelse(population_per_town_data$workforce==0, 0, population_per_town_data$elderly / population_per_town_data$workforce)
population_per_town_data$child_dependency_ratio <- ifelse(population_per_town_data$workforce==0, 0, population_per_town_data$child / population_per_town_data$workforce)
summary(population_per_town_data)
     CODGEO      total_population       male               female              child             elderly        
 Min.   : 1001   Min.   :      2   Min.   :      0.0   Min.   :      0.0   Min.   :     0.0   Min.   :     0.0  
 1st Qu.:24541   1st Qu.:    184   1st Qu.:     92.0   1st Qu.:     90.0   1st Qu.:    32.0   1st Qu.:    35.0  
 Median :48098   Median :    420   Median :    212.0   Median :    209.0   Median :    80.0   Median :    75.0  
 Mean   :46276   Mean   :   1773   Mean   :    857.6   Mean   :    915.4   Mean   :   333.9   Mean   :   310.1  
 3rd Qu.:67076   3rd Qu.:   1060   3rd Qu.:    528.0   3rd Qu.:    535.0   3rd Qu.:   209.5   3rd Qu.:   190.0  
 Max.   :97424   Max.   :2173279   Max.   :1018597.0   Max.   :1154682.0   Max.   :315127.0   Max.   :338074.0  
   workforce           dependent          sex_ratio       dependency_ratio aged_dependency_ratio
 Min.   :      0.0   Min.   :     0.0   Min.   : 0.0000   Min.   :0.0000   Min.   :0.0000       
 1st Qu.:    112.0   1st Qu.:    70.0   1st Qu.: 0.9075   1st Qu.:0.5135   1st Qu.:0.2091       
 Median :    261.0   Median :   160.0   Median : 0.9971   Median :0.6088   Median :0.2929       
 Mean   :   1128.9   Mean   :   644.1   Mean   : 1.0259   Mean   :0.6538   Mean   :0.3462       
 3rd Qu.:    659.5   3rd Qu.:   401.0   3rd Qu.: 1.1026   3rd Qu.:0.7333   3rd Qu.:0.4166       
 Max.   :1520078.0   Max.   :653201.0   Max.   :10.0000   Max.   :9.0000   Max.   :8.0000       
 child_dependency_ratio
 Min.   :0.0000        
 1st Qu.:0.2453        
 Median :0.3043        
 Mean   :0.3076        
 3rd Qu.:0.3662        
 Max.   :3.0000        
# Scale population to log
hist(population_per_town_data$total_population, ylim=c(0,40000), breaks = seq(0, 2500000, by=250000), xlab="", main = "", labels=T, col ="light blue")
population_per_town_data$total_population_log <- log10(population_per_town_data$total_population)
# Merge geo and pop
geo_pop_by_town <- merge(geo, population_per_town_data)
summary(geo_pop_by_town)
     CODGEO                region       region_capital           department             town_name    
 Min.   : 1001   Midi-Pyrénées: 2980   Toulouse: 2980   Pas-de-Calais :  892   Paris         :   19  
 1st Qu.:24510   Rhône-Alpes  : 2835   Lyon    : 2835   Aisne         :  805   Sainte-Colombe:   14  
 Median :48044   Lorraine     : 2323   Metz    : 2323   Somme         :  782   Saint-Sauveur :   12  
 Mean   :46129   Aquitaine    : 2282   Bordeaux: 2282   Moselle       :  729   Beaulieu      :   10  
 3rd Qu.:67004   Picardie     : 2277   Amiens  : 2277   Seine-Maritime:  718   Beaumont      :   10  
 Max.   :97126   Bourgogne    : 2018   Dijon   : 2018   Côte-d'Or     :  705   Saint-Aubin   :   10  
                 (Other)      :21046   (Other) :21046   (Other)       :31130   (Other)       :35686  
  postal_code       latitude       longitude       total_population       male             female       
 51300  :   46   Min.   :41.39   Min.   :-5.0914   Min.   :      2   Min.   :      0   Min.   :      0  
 51800  :   44   1st Qu.:45.13   1st Qu.: 0.7333   1st Qu.:    184   1st Qu.:     92   1st Qu.:     90  
 70000  :   42   Median :47.37   Median : 2.6833   Median :    420   Median :    210   Median :    208  
 88500  :   42   Mean   :46.96   Mean   : 2.7747   Mean   :   3024   Mean   :   1444   Mean   :   1580  
 80140  :   40   3rd Qu.:48.83   3rd Qu.: 4.8833   3rd Qu.:   1054   3rd Qu.:    524   3rd Qu.:    530  
 10200  :   36   Max.   :51.08   Max.   : 9.5167   Max.   :2173279   Max.   :1018597   Max.   :1154682  
 (Other):35511                                                                                          
     child             elderly         workforce         dependent        sex_ratio       dependency_ratio
 Min.   :     0.0   Min.   :     0   Min.   :      0   Min.   :     0   Min.   : 0.0000   Min.   :0.0000  
 1st Qu.:    32.0   1st Qu.:    35   1st Qu.:    112   1st Qu.:    70   1st Qu.: 0.9074   1st Qu.:0.5132  
 Median :    80.0   Median :    75   Median :    260   Median :   160   Median : 0.9974   Median :0.6090  
 Mean   :   518.2   Mean   :   510   Mean   :   1996   Mean   :  1028   Mean   : 1.0259   Mean   :0.6539  
 3rd Qu.:   208.0   3rd Qu.:   190   3rd Qu.:    655   3rd Qu.:   399   3rd Qu.: 1.1029   3rd Qu.:0.7333  
 Max.   :315127.0   Max.   :338074   Max.   :1520078   Max.   :653201   Max.   :10.0000   Max.   :9.0000  
                                                                                                          
 aged_dependency_ratio child_dependency_ratio total_population_log
 Min.   :0.0000        Min.   :0.0000         Min.   :0.301       
 1st Qu.:0.2094        1st Qu.:0.2450         1st Qu.:2.265       
 Median :0.2932        Median :0.3043         Median :2.623       
 Mean   :0.3465        Mean   :0.3073         Mean   :2.674       
 3rd Qu.:0.4167        3rd Qu.:0.3661         3rd Qu.:3.023       
 Max.   :8.0000        Max.   :3.0000         Max.   :6.337       
                                                                  
# Plot "Distribution of Population for each Town"
#myPalette(low = "white", high = c("green", "red"), mid=NULL, k =50)-Need "GLAD" package
sc <- scale_colour_gradientn(colours =palette(rainbow(8)), limits=c(min(geo_pop_by_town$total_population_log), max(geo_pop_by_town$total_population_log)))

poppulation_distribution <- 
  FraMap + 
  geom_point(aes(x=geo_pop_by_town$longitude, y=geo_pop_by_town$latitude, colour=geo_pop_by_town$total_population_log), 
             data=geo_pop_by_town, alpha=0.8, size=0.6) + 
  sc +
  geom_text(aes(label = town_name, x = longitude, y = latitude), 
            data = subset(geo_pop_by_town, total_population_log %in% head(sort(total_population_log, decreasing=TRUE), 3)),
            check_overlap = TRUE, size=7) +
  labs(color='Total Population in Log') +
  ggtitle("Distribution of Population for each Town") 
poppulation_distribution

# Group population data by department because of small size of some towns and the given geojson file of department
pop_by_department <- ddply(geo_pop_by_town, .(department), function(geo_pop_by_town) {
  data.frame(total_population = sum(geo_pop_by_town$total_population),
             male = sum(geo_pop_by_town$male),
             female = sum(geo_pop_by_town$female),
             child = sum(geo_pop_by_town$child),
             elderly = sum(geo_pop_by_town$elderly),
             dependent = sum(geo_pop_by_town$dependent),
             workforce = sum(geo_pop_by_town$workforce) 
  )})
summary(pop_by_department)
                   department total_population        male              female             child        
 Ain                    : 1   Min.   :    9048   Min.   :    4353   Min.   :    4695   Min.   :   1700  
 Aisne                  : 1   1st Qu.:  268814   1st Qu.:  131022   1st Qu.:  137792   1st Qu.:  50051  
 Allier                 : 1   Median :  512671   Median :  250784   Median :  261218   Median :  91961  
 Alpes-de-Haute-Provence: 1   Mean   : 1114958   Mean   :  532357   Mean   :  582601   Mean   : 191059  
 Alpes-Maritimes        : 1   3rd Qu.:  782721   3rd Qu.:  383158   3rd Qu.:  399563   3rd Qu.: 156655  
 Ardèche                : 1   Max.   :41292301   Max.   :19353343   Max.   :21938958   Max.   :5987413  
 (Other)                :91                                                                             
    elderly          dependent          workforce       
 Min.   :   2076   Min.   :    3776   Min.   :    5272  
 1st Qu.:  55801   1st Qu.:  107317   1st Qu.:  168468  
 Median :  92779   Median :  187742   Median :  324657  
 Mean   : 188003   Mean   :  379062   Mean   :  735896  
 3rd Qu.: 146183   3rd Qu.:  287695   3rd Qu.:  505345  
 Max.   :6423406   Max.   :12410819   Max.   :28881482  
                                                        
pop_by_department$dependency_ratio <- pop_by_department$dependent / pop_by_department$workforce
pop_by_department$aged_dependency_ratio <- pop_by_department$elderly / pop_by_department$workforce
pop_by_department$child_dependency_ratio <- pop_by_department$child / pop_by_department$workforce
# Scale population to log
pop_by_department$total_population_log <- log10(pop_by_department$total_population)
# Merge geo and pop
geo_pop_by_department <- merge(geo, pop_by_department)
summary(geo_pop_by_department)
          department              region       region_capital           town_name      postal_code   
 Pas-de-Calais :  894   Midi-Pyrénées: 3021   Toulouse: 3021   Paris         :   19   51300  :   46  
 Aisne         :  816   Rhône-Alpes  : 2882   Lyon    : 2882   Sainte-Colombe:   14   51800  :   44  
 Somme         :  782   Lorraine     : 2333   Metz    : 2333   Saint-Sauveur :   12   70000  :   42  
 Seine-Maritime:  745   Aquitaine    : 2296   Bordeaux: 2296   Beaulieu      :   11   88500  :   42  
 Moselle       :  730   Picardie     : 2291   Amiens  : 2291   Saint-Sulpice :   11   80140  :   40  
 Côte-d'Or     :  707   Bourgogne    : 2046   Dijon   : 2046   Beaumont      :   10   02160  :   38  
 (Other)       :31920   (Other)      :21725   (Other) :21725   (Other)       :36517   (Other):36342  
     CODGEO         latitude       longitude       total_population        male              female        
 Min.   : 1001   Min.   :41.39   Min.   :-5.0914   Min.   :    9048   Min.   :    4353   Min.   :    4695  
 1st Qu.:24527   1st Qu.:45.17   1st Qu.: 0.6667   1st Qu.:  317665   1st Qu.:  153234   1st Qu.:  162818  
 Median :48111   Median :47.40   Median : 2.6333   Median :  529618   Median :  257449   Median :  272169  
 Mean   :46096   Mean   :46.98   Mean   : 2.7380   Mean   :  694650   Mean   :  336127   Mean   :  358523  
 3rd Qu.:66169   3rd Qu.:48.83   3rd Qu.: 4.8667   3rd Qu.:  776291   3rd Qu.:  378636   3rd Qu.:  397655  
 Max.   :97126   Max.   :51.08   Max.   : 9.5167   Max.   :41292301   Max.   :19353343   Max.   :21938958  
                                                                                                           
     child            elderly          dependent          workforce        dependency_ratio aged_dependency_ratio
 Min.   :   1700   Min.   :   2076   Min.   :    3776   Min.   :    5272   Min.   :0.4297   Min.   :0.1636       
 1st Qu.:  54952   1st Qu.:  66927   1st Qu.:  125937   1st Qu.:  189820   1st Qu.:0.5571   1st Qu.:0.2606       
 Median : 101471   Median :  93119   Median :  193520   Median :  326192   Median :0.6023   Median :0.3071       
 Mean   : 129285   Mean   : 122310   Mean   :  251595   Mean   :  443055   Mean   :0.6014   Mean   :0.3101       
 3rd Qu.: 156655   3rd Qu.: 143873   3rd Qu.:  278973   3rd Qu.:  496202   3rd Qu.:0.6458   3rd Qu.:0.3570       
 Max.   :5987413   Max.   :6423406   Max.   :12410819   Max.   :28881482   Max.   :0.7162   Max.   :0.4536       
                                                                                                                 
 child_dependency_ratio total_population_log
 Min.   :0.2073         Min.   :3.957       
 1st Qu.:0.2708         1st Qu.:5.502       
 Median :0.2920         Median :5.724       
 Mean   :0.2913         Mean   :5.710       
 3rd Qu.:0.3054         3rd Qu.:5.890       
 Max.   :0.3468         Max.   :7.616       
                                            
# Plot "Distribution of Population for each department"
#myPalette(low = "white", high = c("green", "red"), mid=NULL, k =50)-Need "GLAD" package
sc <- scale_colour_gradientn(colours =palette(rainbow(8)), limits=c(min(geo_pop_by_department$total_population_log), max(geo_pop_by_department$total_population_log)))
pop_distribution_department <- 
  FraMap + 
  geom_point(aes(x=geo_pop_by_department$longitude, y=geo_pop_by_department$latitude, colour=geo_pop_by_department$total_population_log), 
             data=geo_pop_by_department, alpha=0.8, size=0.6) + 
  sc +
  geom_text(aes(label = town_name, x = longitude, y = latitude), 
            data = subset(geo_pop_by_department, total_population_log %in% head(sort(total_population_log, decreasing=TRUE), 3)),
            check_overlap = TRUE, size=7) +
  labs(color='Total Population in Log') +
  ggtitle("Distribution of Population for each department") 
pop_distribution_department

7 Produce consistent datasets

# use only integer values
geo$CODGEO = as.integer(geo$CODGEO)  # already integer
population$CODGEO = as.integer(population$CODGEO)
firms$CODGEO = as.integer(firms$CODGEO)
salary$CODGEO = as.integer(salary$CODGEO)
# install.packages("dplyr")
library(dplyr)
dataset = c("population", "salary", "firms", "geo")
# obtain sommon IDs for all datasets
for (i in dataset){
  # get i-th name and make a new variable adding NEW
  nam <- paste(i, "NEW", sep = "")
  # counter to identify the number of iteration in j
  iter = 1
  for (j in dataset){
    if (j != i){
      # datasets different from i-th
      if (iter == 1){
        # 1st iteration: use the original dataset (e.g., geo)
        assign(nam, semi_join(get(i), get(j), by = "CODGEO"))
      } else{
        # successive iteration: use the new dataset (e.g., geoNEW)
        assign(nam, semi_join(get(nam), get(j), by = "CODGEO"))
      }
      iter = iter + 1
    }
  }
}
# check how many observation have been deleted
for (i in dataset){
  del_rows = nrow(get(i)) - nrow(get(paste(i, "NEW", sep = "")))
  del_prop = del_rows / nrow(get(paste(i, "NEW", sep = "")))
  del_obs = paste("For", i, del_rows, "have been deleted.",
                  "They were the", round(del_prop, digits=2), "% of the total.", sep = " ")
  print(del_obs)
}
[1] "For population 1521935 have been deleted. They were the 2.21 % of the total."
[1] "For salary 113 have been deleted. They were the 0.02 % of the total."
[1] "For firms 31658 have been deleted. They were the 6.3 % of the total."
[1] "For geo 31543 have been deleted. They were the 6.24 % of the total."

8 Analysis

8.1 PCA

8.2 Regression

8.3 Clustering

LS0tDQp0aXRsZTogIlRTTCBQcm9qZWN0Ig0KYXV0aG9yOiAiTC4gSW5zb2xpYSwgSi4gS2ltIGFuZCBHLiBZZWdoaWt5YW4iICAgIA0KZGF0ZTogImByIGZvcm1hdChTeXMudGltZSgpLCAnJWQvJW0vJVknKWAiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6DQogICAgIyBkZl9wcmludDogcGFnZWQNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogJzMnICMgdXAgdG8gdGhyZWUgZGVwdGhzIG9mIGhlYWRpbmdzIChzcGVjaWZpZWQgYnkgIywgIyMgYW5kICMjIykNCiAgICBoaWdobGlnaHQ6IHRhbmdvDQogICAga2VlcF90ZXg6IHllcw0KICAgIG51bWJlcl9zZWN0aW9uczogeWVzDQogICAgIyB0aGVtZTogdW5pdGVkDQogICMgcGRmIGRvY3VtZW50Og0KICBodG1sX2RvY3VtZW50Og0KICAgIHRoZW1lOiB1bml0ZWQNCiAgICBoaWdobGlnaHQ6IHRhbmdvICAgIA0KICAgIHRvYzogeWVzDQogICAgdG9jX2RlcHRoOiAnMycgDQogIHBkZl9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogJzMnDQogICAga2VlcF90ZXg6IHRydWUNCiAgICBsYXRleF9lbmdpbmU6IHBkZmxhdGV4DQplZGl0b3Jfb3B0aW9uczoNCiAgIyB0b2M6IHllcw0KICAjIHRvY19kZXB0aDogMw0KICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lDQotLS0NCg0KDQojIEdlbmVyYWwgaW5mb3JtYXRpb24gIw0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KV2UgYW5hbHl6ZSBhIGRhdGFzZXQgcHVibGlzaGVkIG9uIFtLYWdnbGVdKGh0dHBzOi8vd3d3LmthZ2dsZS5jb20vZXRpZW5uZWxxL2ZyZW5jaC1lbXBsb3ltZW50LWJ5LXRvd24pLg0KSXQgcmVmZXJzIHRvIGZyZW5jaCBlbXBsb3ltZW50LCBzYWxhcmllcywgcG9wdWxhdGlvbiBwZXIgdG93bi4gDQpUaGUgYWltIGlzIHRvIGV2YWx1YXRlIGVxdWFsaXR5L2luZXF1YWxpdGllcyBpbiBGcmFuY2UsIGFuZCBnZW9ncmFwaGljYWwgZGlzdHJpYnV0aW9uIG9mIGJ1c2luZXNzIGFjY29yZGluZyB0byB0aGVpciBzaXplLg0KDQpTdWNoIGRhdGEgYXJlIGNvbGxlY3RlZCBieSB0aGUgSU5TRUUuDQpJbmZvcm1hdGlvbiByZWdhcmRpbmcgdGhlIG51bWJlciBvZiBmaXJtcyBpbiBldmVyeSBmcmVuY2ggdG93biwgY2F0ZWdvcml6ZWQgYnkgc2l6ZQ0KY2FuIGJlIGZvdW5kIFtoZXJlXShodHRwczovL3d3dy5pbnNlZS5mci9mci9tZXRhZG9ubmVlcy9kZWZpbml0aW9uL2MxMTM1KS4gDQpJbmZvcm1hdGlvbiBhYm91dCBzYWxhcmllcyBhcm91bmQgZnJlbmNoIHRvd24gcGVyIGpvYiBjYXRlZ29yaWVzLCBhZ2UgYW5kIHNleCAoZXhwcmVzc2VkIGluIGF2ZXJhZ2UgbmV0IGFtb3VudCBwZXIgaG91ciBpbiBldXJvKSBjYW4gYmUgZm91bmQgW2hlcmVdKGh0dHBzOi8vd3d3Lmluc2VlLmZyL2ZyL3N0YXRpc3RpcXVlcy8yNTIyNTE1KS4NCkRlbW9ncmFwaGljIGluZm9ybWF0aW9uIGluIEZyYW5jZSBwZXIgdG93biwgYWdlLCBzZXggYW5kIGxpdmluZyBtb2RlDQpjYW4gYmUgZm91bmQgW2hlcmVdKGh0dHBzOi8vd3d3Lmluc2VlLmZyL2ZyL3N0YXRpc3RpcXVlcy8yODYzNjA3KS4NCg0KQWRkaXRpb25hbCBpbmZvIGFib3V0IFBvcHVsYXRpb24gRGF0YSBjYW4gYmUgZm91bmQgW2hlcmVdKGh0dHBzOi8vd3d3Lmluc2VlLmZyL2ZyL3N0YXRpc3RpcXVlcy8yODYzNjA3I2RpY3Rpb25uYWlyZSksIA0KaXQgYWxsb3dzIHRvIGFkZCBjYXTpZ29yaWUgc29jaW9wcm9mZXNzaW9ubmVsbGUuDQoNCg0KDQoNCiMjIEFpbSBvZiB0aGUgc3R1ZHkgIyMNClRoaXMgcHJvamVjdCBhaW1zIHRvIGV4cGxvcmUgZXhpc3RpbmcgcGF0dGVybnMgYW1vbmcgRnJlbmNoIHRvd25zLg0KDQpJbiBwYXJ0aWN1bGFyLCB3ZSBhcmUgaW50ZXJlc3RlZCBpbjoNCg0KKiBldmFsdWF0aW5nIHBvc3NpYmxlIGluZXFpYWxpdGllczogcGVyIHRvd25zL3JlZ2lvbiwgc2V4LCBhZ2UsIGV0Yy47DQoqIHByZWRpY3RpbmcgdGhlIC4uLiB1c2luZyBhIHJlZ3Jlc3Npb24gbW9kZWw7DQoqIHJlZHVjZSB0aGUgZGltZW5zaW9uYWxpdHkgb2YgLi4uIHBlcmZvcm1pbmcgYSBQQ0E7DQoqIGV4cGxvcmUgZGlmZmVyZW50IGFsZ29yaXRobXMgdG8gY2x1c3RlciBtYWxlL2ZlbWFsZXMgdXNpbmcgLi4uDQoNCg0KDQojIEdlbmVyYWwgcHJlLXByb2Nlc3NpbmcgcGhhc2UgIw0KDQpJbXBvcnQgZGF0YToNCmBgYHtyIHdhcm5pbmc9RkFMU0V9DQojIG9wdGlvbnMoZW5jb2RpbmcgPSAiVVRGLTgiKSAgIyBmb3IgTWFjIFtQUk9CQUJMWSwgdG8gY2hlY2tdDQojIG9wdGlvbnMoZW5jb2RpbmcgPSAiSVNPLTg4NTktMSIpICAjIGZvciBXaW5kb3dzIFtQUk9CQUJMWSBOT1QgV09SS0lOR10NCnNldHdkKCIuL2RhdGEiKQ0KZmlybXMgICAgICAgPC0gcmVhZC5jc3YoImJhc2VfZXRhYmxpc3NlbWVudF9wYXJfdHJhbmNoZV9lZmZlY3RpZi5jc3YiLCBlbmNvZGluZyA9ICJVVEYtOCIpDQpnZW8gICAgICAgICA8LSByZWFkLmNzdigibmFtZV9nZW9ncmFwaGljX2luZm9ybWF0aW9uLmNzdiIsIGVuY29kaW5nID0gIlVURi04IikNCnNhbGFyeSAgICAgIDwtIHJlYWQuY3N2KCJuZXRfc2FsYXJ5X3Blcl90b3duX2NhdGVnb3JpZXMuY3N2IiwgZW5jb2RpbmcgPSAiVVRGLTgiKQ0KcG9wdWxhdGlvbiAgPC0gcmVhZC5jc3YoInBvcHVsYXRpb24uY3N2IiwgZW5jb2RpbmcgPSAiVVRGLTgiKQ0KYGBgDQoNCkNoZWNrIHZhcmlhYmxlIG5hbWVzOg0KYGBge3J9DQpuYW1lcyhmaXJtcykgDQpuYW1lcyhwb3B1bGF0aW9uKQ0KbmFtZXMoc2FsYXJ5KQ0KbmFtZXMoZ2VvKQ0KYGBgDQoNClRvIGJldHRlciB1bmRlcnN0YW5kIHRoZSBkYXRhIHdlIGFzc2lnbiBtZWFuaW5nZnVsIG5hbWVzIGFuZCBkcm9wIHNvbWUgdmFyaWFibGVzIHdoaWNoIGFyZSBub3QgbmVlZGVkIChhdCB0aGUgbW9tZW50KToNCmBgYHtyfSANCm5hbWVzKGZpcm1zKVsyOm5jb2woZmlybXMpXSA8LQ0KICBjKCJ0b3duIiwgDQogICAgInJlZ051bSIsDQogICAgImRlcHROdW0iLA0KICAgICJ0b3RhbCIsDQogICAgIm51bGwiLA0KICAgICJmaXJtc0VtcGxfMV81IiwNCiAgICAiZmlybXNFbXBsXzZfOSIsDQogICAgImZpcm1zRW1wbF8xMF8xOSIsDQogICAgImZpcm1zRW1wbF8yMF80OSIsDQogICAgImZpcm1zRW1wbF81MF85OSIsDQogICAgImZpcm1zRW1wbF8xMDBfMTk5IiwNCiAgICAiZmlybXNFbXBsXzIwMF80OTkiLA0KICAgICJmaXJtc0VtcGxfNTAwcGx1cyIpDQoNCm5hbWVzKHNhbGFyeSlbMjpuY29sKHNhbGFyeSldIDwtDQogIGMoInRvd24iLA0KICAgICJzYWxfZ2VuZXJhbCIsICAgIA0KICAgICJzYWxfZXhlY3V0aXZlIiwNCiAgICAic2FsX21pZE1hbmFnZXIiLA0KICAgICJzYWxfZW1wbG95ZWUiLA0KICAgICJzYWxfd29ya2VyIiwNCiAgICAic2FsX0ZlbWFsZXMiLA0KICAgICJzYWxfRl9leGVjdXRpdmUiLA0KICAgICJzYWxfRl9taWRNYW5hZ2VyIiwNCiAgICAic2FsX0ZfZW1wbG95ZWUiLA0KICAgICJzYWxfRl93b3JrZXIiLA0KICAgICJzYWxfTWFsZXMiLA0KICAgICJzYWxfTV9leGVjdXRpdmUiLA0KICAgICJzYWxfTV9taWRNYW5hZ2VyIiwNCiAgICAic2FsX01fZW1wbG95ZWUiLA0KICAgICJzYWxfTV93b3JrZXIiLA0KICAgICJzYWxfMThfMjUiLA0KICAgICJzYWxfMjZfNTAiLA0KICAgICJzYWxfNTFwbHVzIiwNCiAgICAic2FsX0ZfMThfMjUiLA0KICAgICJzYWxfRl8yNl81MCIsDQogICAgInNhbF9GXzUxcGx1cyIsDQogICAgInNhbF9NXzE4XzI1IiwNCiAgICAic2FsX01fMjZfNTAiLA0KICAgICJzYWxfTV81MXBsdXMiKQ0KDQpuYW1lcyhwb3B1bGF0aW9uKVs1OjddIDwtDQogIGMoImFnZUNhdGVnNSIsDQogICAgInNleCIsDQogICAgInBlb3BsZUNhdGVnTnVtIikNCg0KIyBEcm9wIHVubmVjZXNzYXJ5IGNvbHVtbnMgKGNvZGUvbnVtIGFuZCBuYW1lIHJlcHJlc2VudHMgc2FtZSB0aGluZykNCmdlbyA8LSBzdWJzZXQoZ2VvLCBzZWxlY3QgPSAtYyhFVV9jaXJjbywgY29kZV9y6Wdpb24sIG51belyb19k6XBhcnRlbWVudCwgcHLpZmVjdHVyZSwgbnVt6XJvX2NpcmNvbnNjcmlwdGlvbiwg6WxvaWduZW1lbnQpKQ0KIyBjaGFuZ2UgbmFtZXMgDQpuYW1lcyhnZW8pWzE6Nl0gPSANCiAgYygicmVnaW9uIiwgDQogICAgInJlZ2lvbl9jYXBpdGFsIiwgDQogICAgImRlcGFydG1lbnQiLCANCiAgICAidG93bl9uYW1lIiwgDQogICAgInBvc3RhbF9jb2RlIiwgDQogICAgIkNPREdFTyIpDQpgYGANCg0KQ2hlY2sgdmFyaWFibGUgbmFtZXM6DQpgYGB7cn0NCm5hbWVzKGZpcm1zKSANCm5hbWVzKHBvcHVsYXRpb24pDQpuYW1lcyhzYWxhcnkpDQpuYW1lcyhnZW8pDQpgYGANCg0KDQpbW01PVkUgTEFURVJdXQ0KQWNjb3JkaW5nIHRvIHRoZSBpbmZvcm1hdGlvbiBwcm92aWRlZCwgdGhlIENPREdFTyB2YXJpYWJsZSAoaW4gZmlybXMsIHNhbGFyeSBhbmQgcG9wdWxhdGlvbikgYW5kIGNvZGVfaW5zZWUgKGluIGdlbykgaGF2ZSB0byBiZSBtZXJnZWQuDQpIb3dldmVyLCBmb3IgZGlmZmVyZW50IHJlYXNvbnMgYWxyZWFkeSBpZGVudGlmaWVkIGJ5IGEga2FnZ2xlIHVzZXIgb24gW2hpcyBrZXJuZWxdKGh0dHBzOi8vd3d3LmthZ2dsZS5jb20vYW5xaXR1L2luc2lnaHRzLW9uLWJ1c2luZXNzLWRlbW9ncmFwaGljLWluZXF1YWxpdHkpIHRoZXkgZG8gbm90Lg0KVG8gZG8gc286DQpgYGB7cn0NCmZpcm1zJENPREdFTyA8LSBzdWIoIkEiLCAiMCIsIGZpcm1zJENPREdFTykNCmZpcm1zJENPREdFTyA8LSBzdWIoIkIiLCAiMCIsIGZpcm1zJENPREdFTykNCnNhbGFyeSRDT0RHRU8gPC0gc3ViKCJBIiwgIjAiLCBzYWxhcnkkQ09ER0VPKQ0Kc2FsYXJ5JENPREdFTyA8LSBzdWIoIkIiLCAiMCIsIHNhbGFyeSRDT0RHRU8pDQpwb3B1bGF0aW9uJENPREdFTyA8LSBzdWIoIkEiLCAiMCIsIHBvcHVsYXRpb24kQ09ER0VPKQ0KcG9wdWxhdGlvbiRDT0RHRU8gPC0gc3ViKCJCIiwgIjAiLCBwb3B1bGF0aW9uJENPREdFTykNCmBgYA0KDQoNCg0KIyBBbmFseXplIGZpcm1zIGRhdGEgIw0KDQojIyBQcmUtcHJvY2Vzc2luZyAjIw0KYGBge3J9DQoNCiMgcHJlbGltaW5hcnkgY2hlY2tzDQpkaW0oZmlybXMpDQpuYW1lcyhmaXJtcykNCmhlYWQoZmlybXMpDQpzdHIoZmlybXMpDQpzdW1tYXJ5KGZpcm1zKQ0KDQojIGNvbnZlcnRpbmcgQ09ER0VPIGZvcm1hdA0KZmlybXMkQ09ER0VPIDwtIGFzLm51bWVyaWMoZmlybXMkQ09ER0VPKQ0KDQojIENoZWNrIGZvciBkdXBsaWNhdGVkIGRhdGENCnN1bShkdXBsaWNhdGVkLmRhdGEuZnJhbWUoZmlybXMpKQ0KDQojIENhdGVnb3JpemUgZmlybXMnIHNpemUgYWNjb3JkaW5nIHRvIEVVIHN0YW5kYXJkLCBidXQgc2xpZ2h0bHkgZGlmZmVyZW50IGZvciBtZWRpdW0gYW5kIGxhcmdlIGZpcm1zIChtZWRpdW0gZmlybXMgaGF2ZSA8MjAwIGluc3RlYWQgb2YgPDI1MCBlbXBsb3llZXMpDQojIGh0dHA6Ly9lYy5ldXJvcGEuZXUvZXVyb3N0YXQvc3RhdGlzdGljcy1leHBsYWluZWQvaW5kZXgucGhwL0dsb3NzYXJ5OkVudGVycHJpc2Vfc2l6ZQ0KZmlybXMkbWljcm8gICA8LSBmaXJtcyRmaXJtc0VtcGxfMV81ICsgZmlybXMkZmlybXNFbXBsXzZfOQ0KZmlybXMkc21hbGwgICA8LSBmaXJtcyRmaXJtc0VtcGxfMTBfMTkgKyBmaXJtcyRmaXJtc0VtcGxfMjBfNDkNCmZpcm1zJG1lZGl1bSAgPC0gZmlybXMkZmlybXNFbXBsXzUwXzk5ICsgZmlybXMkZmlybXNFbXBsXzEwMF8xOTkNCmZpcm1zJGxhcmdlICAgPC0gZmlybXMkZmlybXNFbXBsXzIwMF80OTkgKyBmaXJtcyRmaXJtc0VtcGxfNTAwcGx1cw0KDQojIERyb3AgdW5uZWNlc3NhcnkgKGF0IHRoZSBtb21lbnQpIGNvbHVtbnMgDQpmaXJtcyA8LSBzdWJzZXQoZmlybXMsIHNlbGVjdCA9IGMoQ09ER0VPLCB0b3duLCB0b3RhbCwgbWljcm8sIHNtYWxsLCBtZWRpdW0sIGxhcmdlLCBudWxsKSkNCg0KIyBjaGVjaw0KaGVhZChmaXJtcykNCnN1bW1hcnkoZmlybXMpDQpzdHIoZmlybXMpDQojIGRlcHROdW0gaGFzIHRvIGJlIGZhY3Rvcj8NCmBgYA0KDQojIyBFREEgIyMNCiAgDQpgYGB7cn0NCiMgdGhlcmUgaXMgYW4gb2JzIHdpdGggbW9yZSB0aGFuIDMxNksgbnVsbCBkYXRhOiB3ZSBjaGVjayBpZiBpdCBpcyBwbGF1c2libGUNCiMgZ2V0IHRoZSBoaWdoZXN0IDIwIG51bGwgdmFsdWVzDQpzdHJfZmlybXMgPC0gc29ydChmaXJtcyRudWxsLCBkZWNyZWFzaW5nID0gVClbMToyMF0NCiMgZ2V0IHRoZWlyIGluZGV4ZXMNCnN0cl9maXJtc19pbmQgPC0gbWF0Y2goc3RyX2Zpcm1zLCBmaXJtcyRudWxsKQ0KIyBnZXQgdGhlIGNvcnJlc3BvbmRpbmcgY2l0eQ0KZmlybXMkdG93bltzdHJfZmlybXNfaW5kXQ0KIyBoZW5jZSwgaXQgc2VlbXMgcmVhc29uYWJsZS4uDQoNCiMgY2hlY2sgdGhlIHJhdGlvIG9mIG51bGwgZm9yIGVhY2ggdG93bg0Kc3VtbWFyeShmaXJtcyRudWxsL2Zpcm1zJHRvdGFsKQ0KIyBhIGxvdCBvZiBpbmZvcm1hdGlvbiBpcyBtaXNzaW5nDQojIHNob3VsZCB3ZSByZW1vdmUgdGhlc2UgZGF0YT8NCmhpc3QoZmlybXMkbnVsbC9maXJtcyR0b3RhbCkNCg0KIyBldmFsdWF0ZSB0aGUgZGlzdHJpYnV0aW9uIG9mIGFsbCB0aGUgc2l6ZXMgDQojIChsb2cgdnMuIHJhdGlvIHdydCB0b3RhbD8pDQpoaXN0KGxvZyhmaXJtcyR0b3RhbCkpDQpoaXN0KGxvZyhmaXJtcyRudWxsKSkNCmhpc3QobG9nKGZpcm1zJG1pY3JvKSkNCmhpc3QoZmlybXMkbWljcm8vZmlybXMkdG90YWwpDQpoaXN0KGxvZyhmaXJtcyRzbWFsbCkpDQpoaXN0KGZpcm1zJHNtYWxsL2Zpcm1zJHRvdGFsKQ0KaGlzdChsb2coZmlybXMkbWVkaXVtKSkNCmhpc3QoZmlybXMkbWVkaXVtL2Zpcm1zJHRvdGFsKQ0KaGlzdChsb2coZmlybXMkbGFyZ2UpKQ0KaGlzdChmaXJtcyRsYXJnZS9maXJtcyR0b3RhbCkNCg0KIyBrZWVwIG9ubHkgbG9ncz8NCg0KYGBgDQoNClBDQSBvbiBmaXJtcyBkYXRhOg0KYGBge3J9DQpmaXJtc19jbGVhbiA8LSBmaXJtc1tmaXJtcyRtaWNybyA8IDIwMDAwICYgZmlybXMkbGFyZ2UgPCAyMDAsXQ0KbXlQciA8LSBwcmNvbXAoZmlybXNfY2xlYW5bLCA0OjhdLCBzY2FsZSA9IFRSVUUpDQojcGxvdChzY2FsZShmaXJtc19jbGVhbiRtaWNybyksIHNjYWxlKGZpcm1zX2NsZWFuJGxhcmdlKSkNCiNtZWFuKGZpcm1zX2NsZWFuJG1pY3JvKQ0KI21lYW4oZmlybXNfY2xlYW4kbGFyZ2UpDQpteVByDQpzdW1tYXJ5KG15UHIpDQpwbG90KG15UHIsIHR5cGUgPSAibCIpDQpiaXBsb3QobXlQciwgc2NhbGUgPSAwKQ0KI2V4dHJhY3QgUEMgc2NvcmVzLi4uDQpzdHIobXlQcikNCiNteVByJHggI2NoZWNraW5nIHByaW5jaXBhbCBjb21wb25lbnQgc2NvcmVzDQpmaXJtczIgPC0gY2JpbmQoZmlybXNfY2xlYW4sIG15UHIkeFssIDE6Ml0pDQpoZWFkKGZpcm1zMikNCiNwbG90IHdpdGggZ2dwbG90Li4uDQpyZXF1aXJlKGdncGxvdDIpDQpnZ3Bsb3QoZmlybXMyLCBhZXMoUEMxLCBQQzIpKSArIA0KICBzdGF0X2VsbGlwc2UoZ2VvbSA9ICJwb2x5Z29uIiwgY29sID0gImJsYWNrIiwgYWxwaGEgPSAwLjUpICsgDQogIGdlb21fcG9pbnQoc2hhcGUgPSAyMSwgY29sID0gImJsYWNrIikNCiMgY29ycmVsYXRpb25zIGJldHdlZW4gdmFyaWFibGVzIGFuZCBQQ3MuLi4NCmNvcihmaXJtc19jbGVhblssIDQ6OF0sIGZpcm1zMlssOToxMF0pDQpgYGANCg0KIyMgV2hhdCB3ZSBoYXZlIGxlYXJuZWQgIyMNCg0KKiBNb3JlIG1pY3JvIGZpcm1zIHRoYW4gc21hbGwgb25lcw0KKiAuLi4gDQoNCiMjIEhvdyB0byB1c2UgdGhlc2UgZGF0YSAjIw0KDQpXZSBwbGFuIHRvIHVzZSB0aGVzZSBmb3IgdGhlIGZvbGxvd2luZyB0YXNrczoNCg0KKiBwcmVkaWN0IHRoZSBzYWxhcmllcyB1c2luZyBzdWNoIGluZm9ybWF0aW9uIGFzIHByb3h5IGZvciB0aGUgY29tcGV0aXRpb24gaW4gdGhlIGpvYiBtYXJrZXQ7DQoqIHByZWRpY3QgdGhlIHRvdGFsIG51bWJlciBvZiBmaXJtcywgdXNpbmcgc2FsYXJ5IGRhdGE7DQoqIGdlby1zcGF0aWFsIHBsb3QgZm9yIGZpcm1zJyBzaXplIA0KKiAuLi4gDQoNCg0KIyBBbmFseXplIGdlb2dyYXBoaWNhbCBkYXRhICMNCg0KIyMgUHJlLXByb2Nlc3NpbmcgIyMNCmBgYHtyfQ0KDQojIHByZWxpbWluYXJ5IGNoZWNrcw0KZGltKGdlbykNCm5hbWVzKGdlbykNCmhlYWQoZ2VvKQ0Kc3RyKGdlbykNCnN1bW1hcnkoZ2VvKQ0KDQojIHNwb3QgIiwiIGluc3RlYWQgb2YgIi4iIGluIGxvbmdpdHVkZQ0KbmV3TG9uZyAgICAgICA8LSBhcy5jaGFyYWN0ZXIoZ2VvJGxvbmdpdHVkZSkgICAgIyBjb3B5IHRoZSB2ZWN0b3INCnN1bShncmVwKCIsIiwgbmV3TG9uZykpICAgICAgICAgICAgICAgICAgICAgICAgICMgdG90YWwgY29tbWFzDQppbmRfbG9uZ19lcnIgIDwtIGdyZXAoIiwiLCBuZXdMb25nKSAgICAgICAgICAgICAjIGluZGV4aW5nIHRoZW0NCm5ld0xvbmcgICAgICAgPC0gZ3N1YigiLCIsICIuIiwgbmV3TG9uZykgICAgICAgICMgc3Vic3RpdHV0aW5nIHRoZW0gd2l0aCBkb3RzDQppbmROQV9Mb25nICAgIDwtIGlzLm5hKGFzLm51bWVyaWMoKG5ld0xvbmcpKSkgICAjIHNwb3QgTkENCmdlbyRsb25naXR1ZGVbaW5kTkFfTG9uZ10gICAgICAgICAgICAgICAgICAgICAgICMgdmVyaWZ5IHRoYXQgdGhleSB3ZXJlIGFjdHVhbGx5IG1pc3NpbmcNCmdlbyRsb25naXR1ZGUgPC0gYXMubnVtZXJpYyhuZXdMb25nKSAgICAgICAgICAgICMgb3ZlcndyaXRlIHRoZSBsb25naXR1ZGUgdmFyaWFibGUgd2l0aCB0aGUgbmV3IG9uZQ0KDQojIENoZWNrIGZvciBkdXBsaWNhdGVkIGRhdGEgKGUuZy4sIGNpdGllcyB3aXRoIGRpZmZlcmVudCBwb3N0YWwgY29kZXMsIHRoYXQgd2UgZHJvcHBlZCk6DQogICMgZXMuIHRvIHZlcmlmeSBpdDogIHRyeSBvbiB0aGUgaW5pdGlhbCBkYXRhc2V0DQogICMgc3VtKGdlbyRub21fY29tbXVuZSA9PSAiUGFyaXMiKQ0KICAjIGluZF9kdXBsaWMgPC0gZ2VvJG5vbV9jb21tdW5lID09ICJQYXJpcyINCiAgIyBnZW9baW5kX2R1cGxpYyxdDQpzdW0oZHVwbGljYXRlZC5kYXRhLmZyYW1lKGdlbykpIA0KIyByZXRhaW5nIHVuaXF1ZSBwb3N0YWwgY2l0aWVzDQpnZW8gPC0gdW5pcXVlKGdlbywgYnkgPSAiQ09ER0VPIikNCg0KIyBjaGVjayBhZ2Fpbg0KaGVhZChnZW8pDQpzdW1tYXJ5KGdlbykNCg0KYGBgDQoNCg0KQXNzaWduIGxhdCBhbmQgbG9uZyB2YWx1ZXMgZm9yIE5BcyB1bml0czoNCmBgYHtyfQ0KcmVxdWlyZShnZ21hcCkNCg0KIyBbIGRlbGV0ZT8gXQ0KIyAjIGNvbXBhcmUgbnVtYmVycyBvZiBOQSBpbiBsYXRpdHVkZSBhbmQgbG9uZ2l0dWRlIA0KIyBzdW0oaXMubmEoZ2VvJGxhdGl0dWRlKSkgLSBzdW0oaXMubmEoZ2VvJGxvbmdpdHVkZSkpDQojICMgY2hlY2sgaWYgdGhleSBtYXRjaCBvciBub3QNCiMgc3VtKCFpcy5uYShnZW8kbGF0aXR1ZGVbaW5kTkFfTG9uZ10pKQ0KIyAjIDY0IG9icyBhcmUgbWlzc2luZyBpbiBsb25naXR1ZGUgYnV0IG5vdCBpbiBsYXRpdHVkZSwgaGVuY2UgODggdmljZSB2ZXJzYQ0KDQojIGluZGV4IG9mIE5BcyBhbmQgdGhlaXIgdG90YWwNCmluZE5BX2Nvb3JkID0gaXMubmEoZ2VvJGxhdGl0dWRlKSB8IGlzLm5hKGdlbyRsb25naXR1ZGUpDQpzdW0oaW5kTkFfY29vcmQpDQoNCiMgTk8gTU9SRSBORUVERUQgQkVDQVVTRSBJUyBBIENTViBGSUxFDQogICAgIyAjIGluaXRpYWxpemUgdmFyaWFibGVzDQogICAgIyBjaXR5X3NlYXJjaCA9IDANCiAgICAjIHJlcyA9IGFzLmRhdGEuZnJhbWUobWF0cml4KGMoMCwgMCwgMCksIDEsIDMpKQ0KICAgICMgbmFtZXMocmVzKSA9IGMoImxvbiIsICJsYXQiLCAiYWRkcmVzcyIpDQogICAgIyANCiAgICAjICMgcmV0cmlldmUgbGF0IGFuZCBsb25nIChHb29nbGUgQVBJID0gMjUwMCByZXF1ZXN0IHBlciBkYXkpDQogICAgIyAjIG15X2l0ZXIgPSBmbG9vcihzdW0oaW5kTkFfY29vcmQpLzMpDQogICAgIyBmb3IgKGkgaW4gMTpzdW0oaW5kTkFfY29vcmQpKXsNCiAgICAjIA0KICAgICMgICAjIGNpdHkgc2VhcmNoZWQNCiAgICAjICAgY2l0eV9zZWFyY2hbaV0gPSBwYXN0ZShjKGFzLmNoYXJhY3RlcihOQV9jb29yZCR0b3duX25hbWVbaV0pLCBhcy5jaGFyYWN0ZXIoTkFfY29vcmQkcG9zdGFsX2NvZGVbaV0pLCBhcy5jaGFyYWN0ZXIoTkFfY29vcmQkZGVwYXJ0bWVudFtpXSksICJGcmFuY2UiKSwgc2VwPSIgIiwgY29sbGFwc2UgPSAiLCAiKQ0KICAgICMgICANCiAgICAjICAgIyBzb2x1dGlvbg0KICAgICMgICByZXNbaSxdID0gZ2VvY29kZShjaXR5X3NlYXJjaFtpXSwgb3V0cHV0ID0gImxhdGxvbmEiLCBzb3VyY2UgPSBjKCJnb29nbGUiLCAiZHNrIiksIG1lc3NhZ2luZyA9IEZBTFNFKQ0KICAgICMgDQogICAgIyAgICMgcmV0cmlldmUgc3RpbGwgbWlzc2luZyBkYXRhLCBiZWNhdXNlIG9mIGV4aXN0aW5nIHByb2JsZW1zIHdpdGggQVBJICh1cCB0byAxNSB0cmlhbHMpDQogICAgIyAgIGogPSAwDQogICAgIyAgIHdoaWxlIChhbnkoaXMubmEocmVzW2ksXSkpICYgaiA8IDI1KXsNCiAgICAjICAgICByZXNbaSxdID0gZ2VvY29kZShjaXR5X3NlYXJjaFtpXSwgb3V0cHV0ID0gImxhdGxvbmEiLCBzb3VyY2UgPSBjKCJnb29nbGUiLCAiZHNrIiksIG1lc3NhZ2luZyA9IEZBTFNFKQ0KICAgICMgICAgIGogPSBqICsgMQ0KICAgICMgICB9DQogICAgIyB9DQoNCiAgICAjICMgY2hlY2sgdGhlIHNvbHV0aW9uDQogICAgIyBzb2wgPSBjYmluZChzZWFyY2hlZCA9IGNpdHlfc2VhcmNoLCByZXMpDQoNCiAgICAjICMgc2F2ZSBpdCBhcyBhIGNzdiBmaWxlIHRvIHNhdmUgdGltZQ0KICAgICMgd3JpdGUuY3N2KHJldHJpZXZlZF9nZW9fTkFbLDI6M10sICJnZW9fTkFfRmluYWwuY3N2IiwgcXVvdGUgPSBGQUxTRSwgcm93Lm5hbWVzPUZBTFNFLCBmaWxlRW5jb2RpbmcgPSAiVVRGLTgiKQ0KICAgIA0KDQojIHJlYWQgdGhlIGNyZWF0ZWQgY3N2DQpyZXRyaWV2ZWRfZ2VvX05BID0gcmVhZC5jc3YoImdlb19OQV9GaW5hbC5jc3YiLCBoZWFkZXIgPSBULCBlbmNvZGluZyA9ICJVVEYtOCIpDQojIGdldCBvbmx5IGxvbmcgYW5kIGxhdCBhbmQgYXNzaWduIHRvIG9yaWdpbmFsIE5BIA0KZ2VvJGxhdGl0dWRlW2luZE5BX2Nvb3JkXSA9IHJldHJpZXZlZF9nZW9fTkFbLDJdDQpnZW8kbG9uZ2l0dWRlW2luZE5BX2Nvb3JkXSA9IHJldHJpZXZlZF9nZW9fTkFbLDFdDQoNCiMgdGhlcmUgYXJlIDM3IHN0aWxsIG1pc3NpbmcgdW5pdHMsIHdoaWNoIGFyZSB0b3ducyBsb2NhdGVkIGluIG9sZCBjb2xvbmllcyBmYXIgZnJvbSBFdXJvcGUNCmluZE5BX2Nvb3JkID0gaXMubmEoZ2VvJGxhdGl0dWRlKSB8IGlzLm5hKGdlbyRsb25naXR1ZGUpDQpzdW0oaW5kTkFfY29vcmQpDQojIGV4Y2x1ZGUgdGhvc2UgdG93bnMNCmdlbyA9IGdlb1shaW5kTkFfY29vcmQsXQ0KDQpzdW1tYXJ5KGdlbykNCg0KYGBgDQoNCiMjIEVEQSAjIw0KDQpgYGB7cn0NCiNpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikNCiNpbnN0YWxsLnBhY2thZ2VzKCJnZ21hcCIpDQpyZXF1aXJlKGdncGxvdDIpDQpyZXF1aXJlKGdnbWFwKQ0KDQojIHBsb3QgZnJhbmNlIChjZW50ZXI6IDIuMjEzNzQ5IDQ2LjIyNzYzOCkNCiMgZnJhX2NlbnRlciA9IGFzLm51bWVyaWMoZ2VvY29kZSgiRnJhbmNlIikpDQpmcmFfY2VudGVyID0gYygyLjIxMzc0OSwgNDYuMjI3NjM4KQ0KRnJhTWFwID0gZ2dtYXAoZ2V0X2dvb2dsZW1hcChjZW50ZXI9ZnJhX2NlbnRlciwgc2NhbGU9Miwgem9vbT01KSwgZXh0ZW50PSJub3JtYWwiKQ0KRnJhTWFwDQoNCiMgcGxvdCBhbGwgdG93bnMgYXZhaWxhYmxlDQpnZW9fcG9zID0gYXMuZGF0YS5mcmFtZShjYmluZChsb24gPSBnZW8kbG9uZ2l0dWRlLCBsYXQgPSBnZW8kbGF0aXR1ZGUpKQ0KZ2VvX3BvcyA9IGdlb19wb3NbY29tcGxldGUuY2FzZXMoZ2VvX3BvcyksXQ0KRnJhTWFwICsNCiAgZ2VvbV9wb2ludChhZXMoeD1sb24sIHk9bGF0KSwgZGF0YT1nZW9fcG9zLCBjb2w9Im9yYW5nZSIsIGFscGhhPTAuMSkgDQoNCiMgZGVsZXRlIG5vbi1FdXJvcGVhbiBjb3VudHJpZXMNCmluZF9ub25FdXIgPSBnZW8kbGF0aXR1ZGUgPCAzMCB8IGdlbyRsYXRpdHVkZSA+IDcwIHxnZW8kbG9uZ2l0dWRlIDwgLTIwIHwgZ2VvJGxvbmdpdHVkZSA+IDIwDQpzdW0oaW5kX25vbkV1cikNCmdlbyA9IGdlb1shaW5kX25vbkV1cixdDQoNCiMgcGxvdCBhbGwgRXVyb3BlYW4gdG93bnMgYXZhaWxhYmxlDQpnZW9fcG9zID0gYXMuZGF0YS5mcmFtZShjYmluZChsb24gPSBnZW8kbG9uZ2l0dWRlLCBsYXQgPSBnZW8kbGF0aXR1ZGUpKQ0KZ2VvX3BvcyA9IGdlb19wb3NbY29tcGxldGUuY2FzZXMoZ2VvX3BvcyksXQ0KRnJhTWFwICsNCiAgZ2VvbV9wb2ludChhZXMoeD1sb24sIHk9bGF0KSwgZGF0YT1nZW9fcG9zLCBjb2w9Im9yYW5nZSIsIGFscGhhPTAuMSkgDQoNCmBgYA0KDQoNCg0KIyMgV2hhdCB3ZSBoYXZlIGxlYXJuZWQgIyMNCg0KU29sdmVkOiANCg0KKiBXaHkgbGF0aXR1ZGUgaXMgbWlzc2luZyBhbmQgbm90IGxvbmdpdHVkZT8NCiogVGhlcmUgYXJlIHNvbWUgZHVwbGljYXRpb25zLg0KDQpUbyBkbzoNCg0KKiBXaGF0IHRvIGRvIHdpdGggbm9uLUV1cm9wZWFuIHRvd25zPw0KDQoNCg0KIyMgSG93IHRvIHVzZSB0aGVzZSBkYXRhICMjDQoNCiogQ29tcGFyZSBFdXJvcGVhbiB0b3ducyB2cy4gb2xkIGNvbG9uaWVzPw0KKiBVc2VmdWwgZm9yIGFsbCBkYXRhc2V0cy9hbmFseXNlcw0KDQoNCiMgQW5hbHl6ZSBzYWxhcnkgZGF0YSAjDQoNCiMjIFByZS1wcm9jZXNzaW5nICMjDQoNCmBgYHtyfQ0KIyBwcmVsaW1pbmFyeSBjaGVja3MNCmRpbShzYWxhcnkpDQpuYW1lcyhzYWxhcnkpDQpoZWFkKHNhbGFyeSkNCnN0cihzYWxhcnkpDQpzdW1tYXJ5KHNhbGFyeSkNCg0KIyBEcm9wIHVubmVjZXNzYXJ5IGNvbHVtbnMgKHRvd24gbmFtZSByZXBlYXRzIGluIG90aGVyIHRhYmxlLCBpcyBpdCBzdXJlbHkgcG9zc2libGUgdG8gbWVyZ2UgdGhlbT8pDQpuYW1lcyhzYWxhcnkpDQojIHNhbGFyeSA8LSBzdWJzZXQoc2FsYXJ5LCBzZWxlY3QgPSAtYyh0b3duKSkNCg0KIyBDb252ZXJ0IENPREdFTyB0byBudW1lcmljDQpzYWxhcnkkQ09ER0VPIDwtIGFzLm51bWVyaWMoYXMuY2hhcmFjdGVyKHNhbGFyeSRDT0RHRU8pKQ0KDQojIENoZWNrIGZvciBkdXBsaWNhdGVkIGRhdGENCnN1bShkdXBsaWNhdGVkLmRhdGEuZnJhbWUoc2FsYXJ5KSkNCg0KDQpgYGANCg0KDQojIyBFREEgIyMNCg0KVW5pdmFyaWF0ZSBhbmFseXNpcyBjb21wYXJpbmcgdmFyaW91cyBqb2IgY2F0ZWdvcmllcyBmb3IgYm90aCBnZW5kZXJzOiAgDQpgYGB7cn0NCg0KcmVxdWlyZShnZ3Bsb3QyKQ0KDQojICBudW1iZXIgb2YgdW5pdHMNCm5fc2V4IDwtIGxlbmd0aChzYWxhcnkkc2FsX0ZlbWFsZXMpDQoNCiMgdmVjdG9yIHJlcHJlc2VudGluZyBtYWxlcyBhbmQgZmVtYWxlcw0KTGFiZWwgPC0gYyhyZXAoIk0iLCBuX3NleCo1KSwgcmVwKCJGIiwgbl9zZXgqNSkpDQoNCiMgdmVjdG9yIHJlcHJlc2VudGluZyB0aGUgdmFyaWFibGUgY29uc2lkZXJlZA0KVmFyaWFibGUgPC0gYyhyZXAoIkdlbmVyYWwiLCBuX3NleCksIA0KICAgICAgICAgICAgIHJlcCgiRXhlY3V0aXZlIiwgbl9zZXgpLA0KICAgICAgICAgICAgIHJlcCgiTWlkTWFuYWdlciIsIG5fc2V4KSwNCiAgICAgICAgICAgICByZXAoIkVtcGxveWVlIiwgbl9zZXgpLA0KICAgICAgICAgICAgIHJlcCgiV29ya2VyIixuX3NleCksDQogICAgICAgICAgICAgcmVwKCJHZW5lcmFsIiwgbl9zZXgpLCANCiAgICAgICAgICAgICByZXAoIkV4ZWN1dGl2ZSIsIG5fc2V4KSwNCiAgICAgICAgICAgICByZXAoIk1pZE1hbmFnZXIiLCBuX3NleCksDQogICAgICAgICAgICAgcmVwKCJFbXBsb3llZSIsIG5fc2V4KSwNCiAgICAgICAgICAgICByZXAoIldvcmtlciIsbl9zZXgpKQ0KDQojIG1lcmdlIHRoZXNlIGRhdGENCnNhbF9zZXggPSBjYmluZC5kYXRhLmZyYW1lKExhYmVsID0gTGFiZWwsIA0KICAgICAgICAgICAgIHZhbHVlID0gYyhzYWxhcnkkc2FsX01hbGVzLCBzYWxhcnkkc2FsX01fZXhlY3V0aXZlLCBzYWxhcnkkc2FsX01fbWlkTWFuYWdlciwgc2FsYXJ5JHNhbF9NX2VtcGxveWVlLCBzYWxhcnkkc2FsX01fd29ya2VyLA0KICAgICAgICAgICAgICAgICAgICAgICBzYWxhcnkkc2FsX0ZlbWFsZXMsIHNhbGFyeSRzYWxfRl9leGVjdXRpdmUsIHNhbGFyeSRzYWxfRl9taWRNYW5hZ2VyLCBzYWxhcnkkc2FsX0ZfZW1wbG95ZWUsIHNhbGFyeSRzYWxfRl93b3JrZXIpLA0KICAgICAgICAgICAgIFZhcmlhYmxlID0gVmFyaWFibGUpDQoNCiMgcGxvdHRpbmcgcGhhc2UNCnAgPC0gIGdncGxvdChkYXRhID0gc2FsX3NleCwgYWVzKHg9TGFiZWwsIHk9dmFsdWUpKSArDQogICAgICBnZW9tX2JveHBsb3QoYWVzKGZpbGwgPSBMYWJlbCkpICsNCiAgICAgICMgbm90IGNvbG9yIHBvaW50cyByZXBsYWNpbmcgY29sb3VyID0gZ3JvdXAgaW5zdGVhZCBvZiBjb2xvdXI9TGFiZWwNCiAgICAgIGdlb21fcG9pbnQoYWVzKHk9dmFsdWUsIGNvbG91cj1MYWJlbCksIHBvc2l0aW9uID0gcG9zaXRpb25fZG9kZ2Uod2lkdGg9MC43NSkpICsNCiAgICAgIGZhY2V0X3dyYXAoIH4gVmFyaWFibGUsIHNjYWxlcz0iZnJlZSIpICsNCiAgICAgIHhsYWIoIngtYXhpcyIpICsgeWxhYigieS1heGlzIikgKyBnZ3RpdGxlKCJHZW5kZXIgY29tcGFyaXNvbiIpICsNCiAgICAgIHN0YXRfYm94cGxvdChnZW9tID0gImVycm9yYmFyIiwgd2lkdGggPSAwLjUpDQogICAgICAjIHAgPC0gcCArIGd1aWRlcyhmaWxsPWd1aWRlX2xlZ2VuZCh0aXRsZT0iTGVnZW5kIikpDQpwDQoNCiMgZXhjbHVkaW5nIG91dGxpZXJzDQpwMiA8LSBnZ3Bsb3QoZGF0YSA9IHNhbF9zZXgsIGFlcyh4PUxhYmVsLCB5PXZhbHVlKSkgKw0KICAgICAgc2NhbGVfeV9jb250aW51b3VzKGxpbWl0cyA9IHF1YW50aWxlKHNhbF9zZXgkdmFsdWUsIGMoMCwgMC45KSkpICsNCiAgICAgIGdlb21fYm94cGxvdChhZXMoZmlsbCA9IExhYmVsKSkgKw0KICAgICAgIyBub3QgY29sb3IgcG9pbnRzIHJlcGxhY2luZyBjb2xvdXIgPSBncm91cCBpbnN0ZWFkIG9mIGNvbG91cj1MYWJlbA0KICAgICAgZ2VvbV9wb2ludChhZXMoeT12YWx1ZSwgY29sb3VyPUxhYmVsKSwgcG9zaXRpb24gPSBwb3NpdGlvbl9kb2RnZSh3aWR0aD0wLjc1KSkgKw0KICAgICAgZmFjZXRfd3JhcCggfiBWYXJpYWJsZSwgc2NhbGVzPSJmcmVlIikgKw0KICAgICAgeGxhYigieC1heGlzIikgKyB5bGFiKCJ5LWF4aXMiKSArIGdndGl0bGUoIkdlbmRlciBjb21wYXJpc29uIGV4Y2x1ZGluZyB0aGUgbGFzdCBkZWNpbGUiKSArDQogICAgICAjIHAgPC0gcCArIGd1aWRlcyhmaWxsPWd1aWRlX2xlZ2VuZCh0aXRsZT0iTGVnZW5kIikpDQogICAgICBzdGF0X2JveHBsb3QoZ2VvbSA9ICJlcnJvcmJhciIsIHdpZHRoID0gMC41KQ0KcDINCmBgYA0KDQoNCg0KSGlnaGxpZ2h0IGJpdmFyaWF0ZSByZWxhdGlvbnMgdXNpbmcgc2NhdHRlciBtYXRyaWNlczoNCmBgYHtyfQ0KIyBtb3N0IGdlbmVyYWwgcGFpcnMNCnBhaXJzKHNhbGFyeVtjKDM6OCwgMTMsIDE4OjIwKV0pDQojIHBhaXJzIGhpZ2hsaWdodGluZyBnZW5kZXJzJyBkaWZmZXJlbmNlcw0KcGFpcnMoc2FsYXJ5W2MoOToxMiwgMTQ6MTcpXSkNCmBgYA0KDQpGaXQgYSByZWdyZXNzaW9uIG1vZGVsIHRvIHByZWRpY3QgdGhlIHNhbGFyaWVzIG9mIHBlb3BsZSBpbiBhZ2UgMjYtNTAgdXNpbmcgYXMgcmVncmVzc29yIDUxKyB5ZWFyczoNCmBgYHtyLCBmaWcua2VlcD0nYWxsJ30NCg0KIyBmaXQgYW5kIHNob3cgT0xTIGVzdGltYXRlDQpwbG90KHNhbGFyeSRzYWxfMjZfNTAgfiBzYWxhcnkkc2FsXzUxcGx1cykNCmZpdF9MTV8yNl81MCA9IGdsbShzYWxhcnkkc2FsXzI2XzUwIH4gc2FsYXJ5JHNhbF81MXBsdXMsIGRhdGEgPSBzYWxhcnkpDQphYmxpbmUoZml0X0xNXzI2XzUwLCBsd2Q9MywgY29sPSJyZWQiKQ0KDQojIGRpYWdub3N0aWNzDQpzdW1tYXJ5KGZpdF9MTV8yNl81MCkNCnBsb3QoZml0X0xNXzI2XzUwKQ0KYGBgDQoNClNhbWUgYXMgYmVmb3JlIGJ1dCBhZGRpbmcgcG9seW5vbWlhbHMgd2hpY2ggYXJlIGV2YXVhdGVkIHVzaW5nIDEwLWZvbGRzIGNyb3NzIHZhbGlkYXRpb246DQoNCmBgYHtyLCBmaWcua2VlcD0nYWxsJ30NCg0KcmVxdWlyZShib290KQ0Kc2V0LnNlZWQoMSkNCg0KIyBrLUZvbGQgQ3Jvc3MtVmFsaWRhdGlvbg0KY3YuZXJyLksgPSByZXAoMCwgNSkNCmN2LmVyci5LID0gcmJpbmQoY3YuZXJyLkssIGN2LmVyci5LKQ0KZm9yIChpIGluIDE6NSl7DQogIGZpdF9MTV8yNl81MC5LID0gZ2xtKHNhbF8yNl81MCB+IHBvbHkoc2FsXzUxcGx1cywgaSksIGRhdGEgPSBzYWxhcnkpDQogIGN2LmVyci5LWyxpXSA9IGN2LmdsbShzYWxhcnksIGZpdF9MTV8yNl81MC5LLCBLID0gMTApJGRlbHRhWzFdDQp9DQoNCiMgcGxvdHRpbmcgcmVzdWx0cw0KcGxvdChjdi5lcnIuS1sxLF0sIHR5cGUgPSAnbCcsIGNvbCA9ICdyZWQnLCB4bGFiID0gIlBvbHlub21pYWxzJyBvcmRlciIsIA0KICAgICB5bGFiID0gIjEwLWZvbGRzIENWIiwgbWFpbiA9ICJDViBhbmQgYWRqdXN0ZWQgQ1YgZm9yIGRpZmZlcmVudCBwb2x5bm9taWFscyIpDQpsaW5lcyhjdi5lcnIuS1syLF0sIGNvbCA9ICdncmVlbicpDQpwb2ludHMod2hpY2gubWluKGN2LmVyci5LKSwgY3YuZXJyLktbMSwgd2hpY2gubWluKGN2LmVyci5LKV0sIGNvbCA9ICJyZWQiLCBjZXg9MiwgcGNoPTIwKQ0KbGVnZW5kKCd0b3ByaWdodCcsIGxlZ2VuZCA9IGMoJ0NWJywgJ0Fkai4gQ1YnKSwgY29sID0gYygncmVkJywgJ2dyZWVuJyksIHBjaCA9IDEwKQ0KDQpgYGANCg0KDQpQcmVkaWN0aW5nIHNhbF9leGVjdXRpdmUgd2l0aCA1IHJlZ3Jlc3NlcnMgdXNpbmcgTGFzc28gd2l0aCBDViBhbmQgMTAtZm9sZHMgQ1Y6DQpbIFBST0JMRU0gRk9SIENPTExJTkVBUklUWT8gdXNlIHJpZGdlP10gWyBhZGQgcG9seW5vbWlhbHM/IF0NCmBgYHtyLCBmaWcua2VlcD0nYWxsJ30NCg0KcmVxdWlyZShnbG1uZXQpDQpzZXQuc2VlZCgxKQ0KDQojIGNyYXRlIGFuIFggbWF0cml4IGV4Y2x1ZGluZyBpbnRlcmNlcHQNCiMgeCA9IGNiaW5kKHNhbGFyeSRzYWxfbWlkTWFuYWdlciwgc2FsYXJ5JHNhbF9lbXBsb3llZSwgc2FsYXJ5JHNhbF93b3JrZXIsIHNhbGFyeSRzYWxfMThfMjUsIHNhbGFyeSRzYWxfNTFwbHVzKQ0KIyB5ID0gc2FsYXJ5JHNhbF9leGVjdXRpdmUNCiMgcHJvYmFibHkgZm9yIHJpZGdlDQpuYW1lcyhzYWxhcnkpDQp5ID0gc2FsYXJ5JHNhbF8yNl81MA0KeCA9IGFzLm1hdHJpeChjYmluZChzYWxhcnlbLCBjKDQ6OCwgMTMsIDE4LCAyMCldKSkNCm5hbWVzKHgpDQoNCiMgZ3JpZCBmb3IgbGFtYmRhIHZhbHVlcw0KZ3JpZCA9IDEwXnNlcSgxLCAtNSwgbGVuZ3RoID0gMTAwKQ0KbGFzc28ubW9kID0gZ2xtbmV0KHgsIHksIGFscGhhID0gMSwgbGFtYmRhID0gZ3JpZCkNCmRpbShjb2VmKGxhc3NvLm1vZCkpDQojIHRoZSBub3JtcyBhcmUgaW5jcmVhc2luZyBpbiB2YWx1ZSBiZWNhdXNlIG9mIHRoZSBzaHJpbmthZ2UNCmxhc3NvLm1vZCRsYW1iZGFbMV0gICAgICAgICAgICAgICAgICAjIGxhbWJkYSB2YWx1ZQ0Kc3FydChzdW0oY29lZihsYXNzby5tb2QpWy0xLDFdXjIpKSAgICMgTDIgbm9ybSBvZiBpdHMgY29lZmYNCmxhc3NvLm1vZCRsYW1iZGFbNTFdDQpzcXJ0KHN1bShjb2VmKGxhc3NvLm1vZClbLTEsNTFdXjIpKQ0KbGFzc28ubW9kJGxhbWJkYVs5MF0NCnNxcnQoc3VtKGNvZWYobGFzc28ubW9kKVstMSw5MF1eMikpDQojIHByZWRpY3QgdmFsdWVzIGZvciBhIG5ldyBsYW1iZGEsIGUuZy4gT0xTDQpPTFMgPSBwcmVkaWN0KGxhc3NvLm1vZCwgcyA9IDAsIHR5cGUgPSAiY29lZmZpY2llbnRzIilbMTpucm93KGNvZWYobGFzc28ubW9kKSksXQ0KDQojIHNwbGl0IHRoZSBkYXRlIGxlYXZpbmcgdGhlIDEwJSBmb3IgQ1YNCnRyYWluID0gc2FtcGxlKDE6bnJvdyhzYWxhcnkpLCBmbG9vcihucm93KHNhbGFyeSkqMC45KSkNCnRlc3QgPSAtdHJhaW4NCnkudGVzdCA9IHlbdGVzdF0NCmxhc3NvLm1vZCA9IGdsbW5ldCh4W3RyYWluLF0sIHlbdHJhaW5dLCBhbHBoYSA9IDEsIGxhbWJkYSA9IGdyaWQsIHRocmVzaCA9IDFlLTEyKQ0KZXJyLmkgPSByZXAoIk5BIiwgbGVuZ3RoKGdyaWQpKQ0KZm9yIChpIGluIDE6bGVuZ3RoKGdyaWQpKXsNCiAgbGFzc28ucHJlZCA9IHByZWRpY3QobGFzc28ubW9kLCBzID0gZ3JpZFtpXSwgbmV3eCA9IHhbdGVzdCxdKQ0KICBlcnIuaVtpXSA9IG1lYW4oKGxhc3NvLnByZWQgLSB5LnRlc3QpXjIpDQp9DQpwbG90KGxvZyhncmlkKSwgZXJyLmksIHhsYWIgPSAnbG9nIExhbWJkYScsIHlsYWIgPSAndGVzdCBzZXQgTVNFJywgDQogICAgIG1haW4gPSAnVGVzdCBNU0UgYW1vbmcgZGlmZmVyZW50IExhbWJkYXMnLCB5bGltID0gYygwLCAxMCkpDQpiZXN0bGFtID0gZ3JpZFt3aGljaC5taW4oZXJyLmkpXQ0KcG9pbnRzKGxvZyhncmlkKVtiZXN0bGFtXSwgZXJyLmlbYmVzdGxhbV0sIGNvbCA9InJlZCIsIGNleD0yLCBwY2g9MjApDQojIGhpZ2ggdmFsdWVzIG9mIGxhbWJkYSBhcmUgbGlrZSBmaXR0aW5nIGp1c3QgdGhlIGludGVyY2VwdA0KDQojIHVzaW5nIDEwIGZvbGRzIENWDQpzZXQuc2VlZCAoMSkNCmN2Lm91dCA9IGN2LmdsbW5ldCh4W3RyYWluICxdLCB5W3RyYWluXSwgYWxwaGEgPSAxKQ0KcGxvdChjdi5vdXQpDQpiZXN0bGFtID0gY3Yub3V0JGxhbWJkYS5taW4NCmJlc3RsYW0NCmxhc3NvLnByZWQgPSBwcmVkaWN0KGxhc3NvLm1vZCwgcz1iZXN0bGFtLCBuZXd4PXhbdGVzdCAsXSkNCm1lYW4oKGxhc3NvLnByZWQgLSB5LnRlc3QpXjIpDQoNCg0KIyB1c2luZyBiZXN0IHN1YnNldA0KcmVxdWlyZShsZWFwcykNCmRhdGFCUyA9IGFzLmRhdGEuZnJhbWUoY2JpbmQoeSwgeCkpDQpiZXN0LnN1YiA9IHJlZ3N1YnNldHMoeSB+IHgsIGRhdGEgPSBkYXRhQlMsIG52bWF4ID0gbnJvdyhjb2VmKGxhc3NvLm1vZCkpKQ0KYmVzdC5zdWIuc3VtbWFyeSA9IHN1bW1hcnkoYmVzdC5zdWIpDQpuYW1lcyhiZXN0LnN1Yi5zdW1tYXJ5KQ0KIyBtYW51YWwgcGxvdHRpbmcNCnBhcihtZnJvdyA9YygyLDIpKQ0KIyByc3ENCnBsb3QoYmVzdC5zdWIuc3VtbWFyeSRyc3EgLCB4bGFiPSJOdW1iZXIgb2YgVmFyaWFibGVzIiwgeWxhYj0iUnNxIiwgdHlwZT0ibCIpDQppbmRfUnNxID0gd2hpY2gubWF4KGJlc3Quc3ViLnN1bW1hcnkkcnNxKQ0KcG9pbnRzKGluZF9Sc3EsIGJlc3Quc3ViLnN1bW1hcnkkYWRqcjJbaW5kX1JzcV0sIGNvbCA9InJlZCIsIGNleD0yLCBwY2g9MjApDQojIGFkalJzcQ0KcGxvdChiZXN0LnN1Yi5zdW1tYXJ5JGFkanIyICx4bGFiPSJOdW1iZXIgb2YgVmFyaWFibGVzIiwgeWxhYj0iQWRqdXN0ZWQgUlNxIiwgdHlwZT0ibCIpDQppbmRfYWRqUnNxID0gd2hpY2gubWF4KGJlc3Quc3ViLnN1bW1hcnkkYWRqcjIpDQpwb2ludHMoaW5kX2FkalJzcSwgYmVzdC5zdWIuc3VtbWFyeSRhZGpyMltpbmRfYWRqUnNxXSwgY29sID0icmVkIiwgY2V4PTIsIHBjaD0yMCkNCiMgQ3ANCnBsb3QoYmVzdC5zdWIuc3VtbWFyeSRjcCAseGxhYj0iTnVtYmVyIG9mIFZhcmlhYmxlcyIsIHlsYWI9IkNwIiwgdHlwZT0ibCIpDQppbmRfQ3AgPSB3aGljaC5taW4oYmVzdC5zdWIuc3VtbWFyeSRjcCkNCnBvaW50cyhpbmRfQ3AsIGJlc3Quc3ViLnN1bW1hcnkkY3BbaW5kX2FkalJzcV0sIGNvbCA9InJlZCIsIGNleD0yLCBwY2g9MjApDQojIGJpYw0KcGxvdChiZXN0LnN1Yi5zdW1tYXJ5JGJpYyAseGxhYj0iTnVtYmVyIG9mIFZhcmlhYmxlcyIsIHlsYWI9ImJpYyIsIHR5cGU9ImwiKQ0KaW5kX2JpYyA9IHdoaWNoLm1pbihiZXN0LnN1Yi5zdW1tYXJ5JGJpYykNCnBvaW50cyhpbmRfYmljLCBiZXN0LnN1Yi5zdW1tYXJ5JGJpY1tpbmRfYmljXSwgY29sID0icmVkIiwgY2V4PTIsIHBjaD0yMCkNCiMgYnVpbHQtaW4gcGxvdHMNCj9wbG90LnJlZ3N1YnNldHMNCnBhcihtZnJvdz1jKDEsMSkpDQpwbG90KGJlc3Quc3ViLCBzY2FsZSA9ICJyMiIpDQpwbG90KGJlc3Quc3ViLCBzY2FsZSA9ICJhZGpyMiIpDQpwbG90KGJlc3Quc3ViLCBzY2FsZSA9ICJDcCIpDQpwbG90KGJlc3Quc3ViLCBzY2FsZSA9ICJiaWMiKQ0KIyByZXRyaWV2ZSB0aGUgbW9kZWwgd2l0aCBtaW4gQklDDQpjb2VmZmljaWVudHMoYmVzdC5zdWIsIHdoaWNoLm1pbihiZXN0LnN1Yi5zdW1tYXJ5JGJpYykpDQoNCmBgYA0KDQoNCg0KDQpNb2RlbCBzYWxhcmllcyBmb3IgcGVvcGxlIGFnZWQgMTgtMjUsIHdoaWNoIHNlZW1zIG1vcmUgZGlmZmljdWx0IHRvIHByZWRpY3Q6DQpgYGB7ciwgZmlnLmtlZXA9J2FsbCd9DQoNCnBsb3QoeSA9IHNhbGFyeSRzYWxfMThfMjUsIHggPSBzYWxhcnkkc2FsXzUxcGx1cykNCiMgdGhlcmUgaXMgYSBjbGVhciBvdXRsaWVyDQppbmRfb3V0ID0gd2hpY2goc2FsYXJ5JHNhbF8xOF8yNSA9PSBtYXgoc2FsYXJ5JHNhbF8xOF8yNSkpDQojIGNoZWNrIGlmIGlzIHNvIGluIGFsbCBkaW1lbnNpb25zIChhcHBhcmVudGx5IG5vdCkNCmNvbF9jb2wgPC0gcmVwKCJibGFjayIsIG5yb3coc2FsYXJ5KSkNCmNvbF9jb2xbaW5kX291dF0gPC0gInJlZCINCnBhaXJzKHNhbGFyeVssIGMoMzo4LCAxMywgMTg6MjApXSwgY29sID0gY29sX2NvbCkNCg0KIyBldmFsdWF0ZSBhbiBPTFMgZml0DQpmaXRfTE1fMThfMjUgPSBsbShzYWxhcnkkc2FsXzE4XzI1IH4gc2FsYXJ5JHNhbF81MXBsdXMpDQpwbG90KHkgPSBzYWxhcnkkc2FsXzE4XzI1LCB4ID0gc2FsYXJ5JHNhbF81MXBsdXMpDQphYmxpbmUoZml0X0xNXzE4XzI1LCBsd2Q9MywgY29sPSJyZWQiKQ0KDQojICMgdXNpbmcgbW9yZSBwcmVkaWN0b3JzDQojIGZpdF9MTV8yID0gbG0oc2FsYXJ5JHNhbF9NXzE4XzI1IH4gc2FsYXJ5JHNhbF8yNl81MCArIHNhbGFyeSRzYWxfNTFwbHVzICsgc2FsYXJ5JHNhbF9nZW5lcmFsICsgc2FsYXJ5JHNhbF9leGVjdXRpdmUgKyANCiMgICAgICAgICAgICAgICAgIHNhbGFyeSRzYWxfbWlkTWFuYWdlciArIHNhbGFyeSRzYWxfZW1wbG95ZWUgKyBzYWxhcnkkc2FsX3dvcmtlcikNCiMgDQojIHN1bW1hcnkoZml0X0xNXzIpDQpgYGANCg0KUENBIGZvciBzYWxhcnk6DQpgYGB7cn0NCm15UHIgPC0gcHJjb21wKHNhbGFyeVssIDM6MjZdLCBzY2FsZSA9IFRSVUUpDQpteVByDQpzdW1tYXJ5KG15UHIpDQpwbG90KG15UHIsIHR5cGUgPSAibCIpDQpiaXBsb3QobXlQciwgc2NhbGUgPSAwLCBjZXggPSAwLjUpDQpzdHIobXlQcikNCiNteVByJHggI2NoZWNraW5nIHByaW5jaXBhbCBjb21wb25lbnQgc2NvcmVzDQpzYWxhcnkyIDwtIGNiaW5kKHNhbGFyeSwgbXlQciR4WywgMToyXSkNCmhlYWQoc2FsYXJ5MikNCiNwbG90IHdpdGggZ2dwbG90Li4uDQojcmVxdWlyZShnZ3Bsb3QyKQ0KZ2dwbG90KHNhbGFyeTIsIGFlcyhQQzEsIFBDMikpICsgDQogIHN0YXRfZWxsaXBzZShnZW9tID0gInBvbHlnb24iLCBjb2wgPSAiYmxhY2siLCBhbHBoYSA9IDAuNSkgKyANCiAgZ2VvbV9wb2ludChzaGFwZSA9IDIxLCBjb2wgPSAiYmxhY2siKQ0KIyBjb3JyZWxhdGlvbnMgYmV0d2VlbiB2YXJpYWJsZXMgYW5kIFBDcy4uLg0KY29yKHNhbGFyeVssIDM6MjZdLCBzYWxhcnkyWywyNzoyOF0pDQpgYGANCg0KDQojIyBXaGF0IHdlIGhhdmUgbGVhcm5lZCAjIw0KDQoqIENoZWNrIHRoZSBvdXRsaWVyIGluIHNhbF8xOF8yNTogd2hpY2ggY2l0eSBpcyBpdCwgZXRjLg0KKiBldmFsdWF0ZSBwb3NzaWJsZSBtdWx0aWNvbGxpbmVhcml0eQ0KKiBUcnkgYSByZWdyZXNzaW9uIHdpdGggYWxsIHByZWRpY3RvcnMgYW5zIGxhc3NvDQoNCiMjIEhvdyB0byB1c2UgdGhlc2UgZGF0YSAjIw0KDQoqIC4uLg0KDQoNCiMgQW5hbHl6ZSBwb3B1bGF0aW9uIGRhdGEgIw0KDQojIyBQcmUtcHJvY2Vzc2luZyAjIw0KDQoNCmBgYHtyfQ0KIyBwcmVsaW1pbmFyeSBjaGVja3MNCm5hbWVzKHBvcHVsYXRpb24pDQpzdW1tYXJ5KHBvcHVsYXRpb24pDQoNCiMgRHJvcCB1bm5lY2Vzc2FyeSBjb2x1bW5zIChOSVZHRU8gaXMgdGhlIHNhbWUgZm9yIGFsbCkNCnBvcHVsYXRpb24gPC0gc3Vic2V0KHBvcHVsYXRpb24sIHNlbGVjdCA9IC1jKE5JVkdFTywgTElCR0VPKSkNCg0KIyBjb252ZXJ0aW5nIENPREdFTyB0byBudW1lcmljDQpwb3B1bGF0aW9uJENPREdFTyA8LSBhcy5udW1lcmljKHBvcHVsYXRpb24kQ09ER0VPKQ0KDQojIFJlZmFjdG9yIHNleCBhbmQgTU9DTw0KcG9wdWxhdGlvbiRNT0NPIDwtIGZhY3Rvcihwb3B1bGF0aW9uJE1PQ08sIGxldmVscyA9IGMoMTEsMTIsMjEsMjIsMjMsMzEsMzIpLA0KICAgICAgICAgICAgICAgICAgICAgICAgICBsYWJlbHMgPSBjKCJjaGlsZHJlbl9saXZpbmdfd2l0aF90d29fcGFyZW50cyIsICJjaGlsZHJlbiBsaXZpbmcgd2l0aCBvbmUgcGFyZW50IiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiYWR1bHRzX2xpdmluZ19pbl9jb3VwbGVfd2l0aG91dF9jaGlsZCIsICJhZHVsdHNfbGl2aW5nX2luX2NvdXBsZV93aXRoX2NoaWxkcmVuIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiYWR1bHRzX2xpdmluZ19hbG9uZV93aXRoX2NoaWxkcmVuIiwicGVyc29ucyBub3QgZnJvbSBmYW1pbHkgbGl2aW5nIGluIHRoZSBob21lIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAicGVyc29uc19saXZpbmdfYWxvbmUiKSkNCnBvcHVsYXRpb24kc2V4IDwtIGZhY3Rvcihwb3B1bGF0aW9uJHNleCwgbGV2ZWxzID0gYygxLDIpLCBsYWJlbHMgPSBjKCJNYWxlIiwgIkZlbWFsZSIpKQ0KaGVhZChwb3B1bGF0aW9uKQ0KIyBUYWtlIG91dCByb3dzIHdpdGggTkIgKG51bWJlciBvZiBwZW9wbGUgaW4gdGhpcyBjYXRlZ29yeSkgZXF1YWwgdG8gMA0KcG9wdWxhdGlvbiA8LSBwb3B1bGF0aW9uW3BvcHVsYXRpb24kcGVvcGxlQ2F0ZWdOdW0gIT0gMCxdDQoNCmhlYWQocG9wdWxhdGlvbikNCnN1bW1hcnkocG9wdWxhdGlvbikNCg0KDQpgYGANCg0KIyMgRURBICMjDQpgYGB7cn0NCiNDb21wYXJlIGFnZSBjYXRlZ29yaWVzOg0KbGlicmFyeShnZ3Bsb3QyKQ0KIyAgbnVtYmVyIG9mIHVuaXRzDQpuX2NhdCA8LSBsZW5ndGgocG9wdWxhdGlvbiRDT0RHRU8pDQojIGV4dHJhY3QgdW5pcXVlIGNhdGVnb3JpZXMNCnVuaXFfY2F0IDwtIHVuaXF1ZShwb3B1bGF0aW9uJGFnZUNhdGVnNSE9NSkNCiMgdmVjdG9yIHJlcHJlc2VudGluZyBzZXggZm9yIGVhY2ggY2F0ZWdvcnkNCkxhYmVsIDwtIGMocmVwKGMoJ01hbGUnLCAnRmVtYWxlJyksIG5fY2F0KSkNCiMgdmVjdG9yIHJlcHJlc2VudGluZyB0aGUgdmFyaWFibGUgY29uc2lkZXJlZA0KVmFyaWFibGUgPC0gcmVwKHVuaXFfY2F0LCBuX2NhdC9sZW5ndGgodW5pcV9jYXQpKQ0KVmFsdWU9cG9wdWxhdGlvbiRwZW9wbGVDYXRlZ051bQ0KIyBtZXJnZSB0aGVzZSBkYXRhDQojcG9wX2NhdGVnID0gY2JpbmQuZGF0YS5mcmFtZShMYWJlbCA9IExhYmVsLCANCiMgICAgICAgICAgICAgdmFsdWUgPSBWYWx1ZSwNCiMgICAgICAgICAgICAgVmFyaWFibGUgPSBWYXJpYWJsZSkNCiNwIDwtIGdncGxvdChkYXRhID0gcG9wX2NhdGVnLCBhZXMoeD1MYWJlbCwgeT12YWx1ZSkpIA0KI3AgPC0gcCArIGdlb21fYm94cGxvdChhZXMoZmlsbCA9IExhYmVsKSkNCiMgaWYgeW91IHdhbnQgY29sb3IgZm9yIHBvaW50cyByZXBsYWNlIGdyb3VwIHdpdGggY29sb3VyPUxhYmVsDQojcCA8LSBwICsgZ2VvbV9wb2ludChhZXMoeT12YWx1ZSwgY29sb3VyPUxhYmVsKSwgcG9zaXRpb24gPSBwb3NpdGlvbl9kb2RnZSh3aWR0aD0wLjc1KSkNCiNwIDwtIHAgKyBmYWNldF93cmFwKCB+IFZhcmlhYmxlLCBzY2FsZXM9ImZyZWUiKQ0KI3AgPC0gcCArIHhsYWIoIngtYXhpcyIpICsgeWxhYigieS1heGlzIikgKyBnZ3RpdGxlKCJDYXRlZ29yeSBjb21wYXJpc29uIikNCiMgcCA8LSBwICsgZ3VpZGVzKGZpbGw9Z3VpZGVfbGVnZW5kKHRpdGxlPSJMZWdlbmQiKSkNCiNwDQpgYGANCg0KYGBge3J9DQojIFJlc3RydWN0dXJlIHBvcHVsYXRpb24gZGF0YSB0byBwcm9kdWNlIHRoZSBkZW1vZ3JhcGhpYyBwcm9maWxlIHBlciB0b3duDQojIGluc3RhbGwucGFja2FnZXMoInBseXIiKQ0KbGlicmFyeShwbHlyKQ0KcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhIDwtIGRkcGx5KHBvcHVsYXRpb24sIC4oQ09ER0VPKSwgZnVuY3Rpb24ocG9wdWxhdGlvbikgew0KICBkYXRhLmZyYW1lKHRvdGFsX3BvcHVsYXRpb24gPSBzdW0ocG9wdWxhdGlvbiRwZW9wbGVDYXRlZ051bSksDQogICAgICAgICAgICAgbWFsZSA9IHN1bShwb3B1bGF0aW9uW3BvcHVsYXRpb24kc2V4ID09ICJNYWxlIixdJHBlb3BsZUNhdGVnTnVtKSwNCiAgICAgICAgICAgICBmZW1hbGUgPSBzdW0ocG9wdWxhdGlvbltwb3B1bGF0aW9uJHNleCA9PSAiRmVtYWxlIixdJHBlb3BsZUNhdGVnTnVtKSwNCiAgICAgICAgICAgICBjaGlsZCA9IHN1bShwb3B1bGF0aW9uW3BvcHVsYXRpb24kYWdlQ2F0ZWc1ICVpbiUgc2VxKDAsIDEwLCBieT01KSxdJHBlb3BsZUNhdGVnTnVtKSwNCiAgICAgICAgICAgICBlbGRlcmx5ID0gc3VtKHBvcHVsYXRpb25bcG9wdWxhdGlvbiRhZ2VDYXRlZzUgJWluJSBzZXEoNjUsIDgwLCBieT01KSxdJHBlb3BsZUNhdGVnTnVtKSwNCiAgICAgICAgICAgICB3b3JrZm9yY2UgPSBzdW0ocG9wdWxhdGlvbltwb3B1bGF0aW9uJGFnZUNhdGVnNSAlaW4lIHNlcSgxNSwgNjAsIGJ5PTUpLF0kcGVvcGxlQ2F0ZWdOdW0pIA0KICApfSkNCg0KcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJGRlcGVuZGVudCA8LSBwb3B1bGF0aW9uX3Blcl90b3duX2RhdGEkY2hpbGQgKyBwb3B1bGF0aW9uX3Blcl90b3duX2RhdGEkZWxkZXJseQ0KcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJHNleF9yYXRpbyA8LSBpZmVsc2UocG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJGZlbWFsZT09MCwgMCwgcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJG1hbGUgLyBwb3B1bGF0aW9uX3Blcl90b3duX2RhdGEkZmVtYWxlKQ0KcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJGRlcGVuZGVuY3lfcmF0aW8gPC0gaWZlbHNlKHBvcHVsYXRpb25fcGVyX3Rvd25fZGF0YSR3b3JrZm9yY2U9PTAsIDAsIHBvcHVsYXRpb25fcGVyX3Rvd25fZGF0YSRkZXBlbmRlbnQgLyBwb3B1bGF0aW9uX3Blcl90b3duX2RhdGEkd29ya2ZvcmNlKQ0KcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJGFnZWRfZGVwZW5kZW5jeV9yYXRpbyA8LSBpZmVsc2UocG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJHdvcmtmb3JjZT09MCwgMCwgcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJGVsZGVybHkgLyBwb3B1bGF0aW9uX3Blcl90b3duX2RhdGEkd29ya2ZvcmNlKQ0KcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJGNoaWxkX2RlcGVuZGVuY3lfcmF0aW8gPC0gaWZlbHNlKHBvcHVsYXRpb25fcGVyX3Rvd25fZGF0YSR3b3JrZm9yY2U9PTAsIDAsIHBvcHVsYXRpb25fcGVyX3Rvd25fZGF0YSRjaGlsZCAvIHBvcHVsYXRpb25fcGVyX3Rvd25fZGF0YSR3b3JrZm9yY2UpDQpzdW1tYXJ5KHBvcHVsYXRpb25fcGVyX3Rvd25fZGF0YSkNCmBgYA0KDQpgYGB7cn0NCiMgU2NhbGUgcG9wdWxhdGlvbiB0byBsb2cNCmhpc3QocG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJHRvdGFsX3BvcHVsYXRpb24sIHlsaW09YygwLDQwMDAwKSwgYnJlYWtzID0gc2VxKDAsIDI1MDAwMDAsIGJ5PTI1MDAwMCksIHhsYWI9IiIsIG1haW4gPSAiIiwgbGFiZWxzPVQsIGNvbCA9ImxpZ2h0IGJsdWUiKQ0KcG9wdWxhdGlvbl9wZXJfdG93bl9kYXRhJHRvdGFsX3BvcHVsYXRpb25fbG9nIDwtIGxvZzEwKHBvcHVsYXRpb25fcGVyX3Rvd25fZGF0YSR0b3RhbF9wb3B1bGF0aW9uKQ0KDQojIE1lcmdlIGdlbyBhbmQgcG9wDQpnZW9fcG9wX2J5X3Rvd24gPC0gbWVyZ2UoZ2VvLCBwb3B1bGF0aW9uX3Blcl90b3duX2RhdGEpDQpzdW1tYXJ5KGdlb19wb3BfYnlfdG93bikNCg0KDQojIFBsb3QgIkRpc3RyaWJ1dGlvbiBvZiBQb3B1bGF0aW9uIGZvciBlYWNoIFRvd24iDQojbXlQYWxldHRlKGxvdyA9ICJ3aGl0ZSIsIGhpZ2ggPSBjKCJncmVlbiIsICJyZWQiKSwgbWlkPU5VTEwsIGsgPTUwKS1OZWVkICJHTEFEIiBwYWNrYWdlDQpzYyA8LSBzY2FsZV9jb2xvdXJfZ3JhZGllbnRuKGNvbG91cnMgPXBhbGV0dGUocmFpbmJvdyg4KSksIGxpbWl0cz1jKG1pbihnZW9fcG9wX2J5X3Rvd24kdG90YWxfcG9wdWxhdGlvbl9sb2cpLCBtYXgoZ2VvX3BvcF9ieV90b3duJHRvdGFsX3BvcHVsYXRpb25fbG9nKSkpDQpwb3BwdWxhdGlvbl9kaXN0cmlidXRpb24gPC0gDQogIEZyYU1hcCArIA0KICBnZW9tX3BvaW50KGFlcyh4PWdlb19wb3BfYnlfdG93biRsb25naXR1ZGUsIHk9Z2VvX3BvcF9ieV90b3duJGxhdGl0dWRlLCBjb2xvdXI9Z2VvX3BvcF9ieV90b3duJHRvdGFsX3BvcHVsYXRpb25fbG9nKSwgDQogICAgICAgICAgICAgZGF0YT1nZW9fcG9wX2J5X3Rvd24sIGFscGhhPTAuOCwgc2l6ZT0wLjYpICsgDQogIHNjICsNCiAgZ2VvbV90ZXh0KGFlcyhsYWJlbCA9IHRvd25fbmFtZSwgeCA9IGxvbmdpdHVkZSwgeSA9IGxhdGl0dWRlKSwgDQogICAgICAgICAgICBkYXRhID0gc3Vic2V0KGdlb19wb3BfYnlfdG93biwgdG90YWxfcG9wdWxhdGlvbl9sb2cgJWluJSBoZWFkKHNvcnQodG90YWxfcG9wdWxhdGlvbl9sb2csIGRlY3JlYXNpbmc9VFJVRSksIDMpKSwNCiAgICAgICAgICAgIGNoZWNrX292ZXJsYXAgPSBUUlVFLCBzaXplPTcpICsNCiAgbGFicyhjb2xvcj0nVG90YWwgUG9wdWxhdGlvbiBpbiBMb2cnKSArDQogIGdndGl0bGUoIkRpc3RyaWJ1dGlvbiBvZiBQb3B1bGF0aW9uIGZvciBlYWNoIFRvd24iKSANCnBvcHB1bGF0aW9uX2Rpc3RyaWJ1dGlvbg0KYGBgDQoNCmBgYHtyfQ0KIyBHcm91cCBwb3B1bGF0aW9uIGRhdGEgYnkgZGVwYXJ0bWVudCBiZWNhdXNlIG9mIHNtYWxsIHNpemUgb2Ygc29tZSB0b3ducyBhbmQgdGhlIGdpdmVuIGdlb2pzb24gZmlsZSBvZiBkZXBhcnRtZW50DQpwb3BfYnlfZGVwYXJ0bWVudCA8LSBkZHBseShnZW9fcG9wX2J5X3Rvd24sIC4oZGVwYXJ0bWVudCksIGZ1bmN0aW9uKGdlb19wb3BfYnlfdG93bikgew0KICBkYXRhLmZyYW1lKHRvdGFsX3BvcHVsYXRpb24gPSBzdW0oZ2VvX3BvcF9ieV90b3duJHRvdGFsX3BvcHVsYXRpb24pLA0KICAgICAgICAgICAgIG1hbGUgPSBzdW0oZ2VvX3BvcF9ieV90b3duJG1hbGUpLA0KICAgICAgICAgICAgIGZlbWFsZSA9IHN1bShnZW9fcG9wX2J5X3Rvd24kZmVtYWxlKSwNCiAgICAgICAgICAgICBjaGlsZCA9IHN1bShnZW9fcG9wX2J5X3Rvd24kY2hpbGQpLA0KICAgICAgICAgICAgIGVsZGVybHkgPSBzdW0oZ2VvX3BvcF9ieV90b3duJGVsZGVybHkpLA0KICAgICAgICAgICAgIGRlcGVuZGVudCA9IHN1bShnZW9fcG9wX2J5X3Rvd24kZGVwZW5kZW50KSwNCiAgICAgICAgICAgICB3b3JrZm9yY2UgPSBzdW0oZ2VvX3BvcF9ieV90b3duJHdvcmtmb3JjZSkgDQogICl9KQ0KDQpzdW1tYXJ5KHBvcF9ieV9kZXBhcnRtZW50KQ0KcG9wX2J5X2RlcGFydG1lbnQkZGVwZW5kZW5jeV9yYXRpbyA8LSBwb3BfYnlfZGVwYXJ0bWVudCRkZXBlbmRlbnQgLyBwb3BfYnlfZGVwYXJ0bWVudCR3b3JrZm9yY2UNCnBvcF9ieV9kZXBhcnRtZW50JGFnZWRfZGVwZW5kZW5jeV9yYXRpbyA8LSBwb3BfYnlfZGVwYXJ0bWVudCRlbGRlcmx5IC8gcG9wX2J5X2RlcGFydG1lbnQkd29ya2ZvcmNlDQpwb3BfYnlfZGVwYXJ0bWVudCRjaGlsZF9kZXBlbmRlbmN5X3JhdGlvIDwtIHBvcF9ieV9kZXBhcnRtZW50JGNoaWxkIC8gcG9wX2J5X2RlcGFydG1lbnQkd29ya2ZvcmNlDQpgYGANCg0KYGBge3J9DQojIFNjYWxlIHBvcHVsYXRpb24gdG8gbG9nDQpwb3BfYnlfZGVwYXJ0bWVudCR0b3RhbF9wb3B1bGF0aW9uX2xvZyA8LSBsb2cxMChwb3BfYnlfZGVwYXJ0bWVudCR0b3RhbF9wb3B1bGF0aW9uKQ0KDQojIE1lcmdlIGdlbyBhbmQgcG9wDQpnZW9fcG9wX2J5X2RlcGFydG1lbnQgPC0gbWVyZ2UoZ2VvLCBwb3BfYnlfZGVwYXJ0bWVudCkNCnN1bW1hcnkoZ2VvX3BvcF9ieV9kZXBhcnRtZW50KQ0KYGBgDQoNCg0KYGBge3J9DQojIFBsb3QgIkRpc3RyaWJ1dGlvbiBvZiBQb3B1bGF0aW9uIGZvciBlYWNoIGRlcGFydG1lbnQiDQojbXlQYWxldHRlKGxvdyA9ICJ3aGl0ZSIsIGhpZ2ggPSBjKCJncmVlbiIsICJyZWQiKSwgbWlkPU5VTEwsIGsgPTUwKS1OZWVkICJHTEFEIiBwYWNrYWdlDQpzYyA8LSBzY2FsZV9jb2xvdXJfZ3JhZGllbnRuKGNvbG91cnMgPXBhbGV0dGUocmFpbmJvdyg4KSksIGxpbWl0cz1jKG1pbihnZW9fcG9wX2J5X2RlcGFydG1lbnQkdG90YWxfcG9wdWxhdGlvbl9sb2cpLCBtYXgoZ2VvX3BvcF9ieV9kZXBhcnRtZW50JHRvdGFsX3BvcHVsYXRpb25fbG9nKSkpDQpwb3BfZGlzdHJpYnV0aW9uX2RlcGFydG1lbnQgPC0gDQogIEZyYU1hcCArIA0KICBnZW9tX3BvaW50KGFlcyh4PWdlb19wb3BfYnlfZGVwYXJ0bWVudCRsb25naXR1ZGUsIHk9Z2VvX3BvcF9ieV9kZXBhcnRtZW50JGxhdGl0dWRlLCBjb2xvdXI9Z2VvX3BvcF9ieV9kZXBhcnRtZW50JHRvdGFsX3BvcHVsYXRpb25fbG9nKSwgDQogICAgICAgICAgICAgZGF0YT1nZW9fcG9wX2J5X2RlcGFydG1lbnQsIGFscGhhPTAuOCwgc2l6ZT0wLjYpICsgDQogIHNjICsNCiAgZ2VvbV90ZXh0KGFlcyhsYWJlbCA9IHRvd25fbmFtZSwgeCA9IGxvbmdpdHVkZSwgeSA9IGxhdGl0dWRlKSwgDQogICAgICAgICAgICBkYXRhID0gc3Vic2V0KGdlb19wb3BfYnlfZGVwYXJ0bWVudCwgdG90YWxfcG9wdWxhdGlvbl9sb2cgJWluJSBoZWFkKHNvcnQodG90YWxfcG9wdWxhdGlvbl9sb2csIGRlY3JlYXNpbmc9VFJVRSksIDMpKSwNCiAgICAgICAgICAgIGNoZWNrX292ZXJsYXAgPSBUUlVFLCBzaXplPTcpICsNCiAgbGFicyhjb2xvcj0nVG90YWwgUG9wdWxhdGlvbiBpbiBMb2cnKSArDQogIGdndGl0bGUoIkRpc3RyaWJ1dGlvbiBvZiBQb3B1bGF0aW9uIGZvciBlYWNoIGRlcGFydG1lbnQiKSANCnBvcF9kaXN0cmlidXRpb25fZGVwYXJ0bWVudA0KYGBgDQoNCg0KDQojIFByb2R1Y2UgY29uc2lzdGVudCBkYXRhc2V0cyAjDQoNCiANCmBgYHtyfQ0KDQojIHVzZSBvbmx5IGludGVnZXIgdmFsdWVzDQpnZW8kQ09ER0VPID0gYXMuaW50ZWdlcihnZW8kQ09ER0VPKSAgIyBhbHJlYWR5IGludGVnZXINCnBvcHVsYXRpb24kQ09ER0VPID0gYXMuaW50ZWdlcihwb3B1bGF0aW9uJENPREdFTykNCmZpcm1zJENPREdFTyA9IGFzLmludGVnZXIoZmlybXMkQ09ER0VPKQ0Kc2FsYXJ5JENPREdFTyA9IGFzLmludGVnZXIoc2FsYXJ5JENPREdFTykNCg0KIyBpbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpDQpsaWJyYXJ5KGRwbHlyKQ0KZGF0YXNldCA9IGMoInBvcHVsYXRpb24iLCAic2FsYXJ5IiwgImZpcm1zIiwgImdlbyIpDQoNCiMgb2J0YWluIHNvbW1vbiBJRHMgZm9yIGFsbCBkYXRhc2V0cw0KZm9yIChpIGluIGRhdGFzZXQpew0KICAjIGdldCBpLXRoIG5hbWUgYW5kIG1ha2UgYSBuZXcgdmFyaWFibGUgYWRkaW5nIE5FVw0KICBuYW0gPC0gcGFzdGUoaSwgIk5FVyIsIHNlcCA9ICIiKQ0KICAjIGNvdW50ZXIgdG8gaWRlbnRpZnkgdGhlIG51bWJlciBvZiBpdGVyYXRpb24gaW4gag0KICBpdGVyID0gMQ0KICBmb3IgKGogaW4gZGF0YXNldCl7DQogICAgaWYgKGogIT0gaSl7DQogICAgICAjIGRhdGFzZXRzIGRpZmZlcmVudCBmcm9tIGktdGgNCiAgICAgIGlmIChpdGVyID09IDEpew0KICAgICAgICAjIDFzdCBpdGVyYXRpb246IHVzZSB0aGUgb3JpZ2luYWwgZGF0YXNldCAoZS5nLiwgZ2VvKQ0KICAgICAgICBhc3NpZ24obmFtLCBzZW1pX2pvaW4oZ2V0KGkpLCBnZXQoaiksIGJ5ID0gIkNPREdFTyIpKQ0KICAgICAgfSBlbHNlew0KICAgICAgICAjIHN1Y2Nlc3NpdmUgaXRlcmF0aW9uOiB1c2UgdGhlIG5ldyBkYXRhc2V0IChlLmcuLCBnZW9ORVcpDQogICAgICAgIGFzc2lnbihuYW0sIHNlbWlfam9pbihnZXQobmFtKSwgZ2V0KGopLCBieSA9ICJDT0RHRU8iKSkNCiAgICAgIH0NCiAgICAgIGl0ZXIgPSBpdGVyICsgMQ0KICAgIH0NCiAgfQ0KfQ0KDQojIGNoZWNrIGhvdyBtYW55IG9ic2VydmF0aW9uIGhhdmUgYmVlbiBkZWxldGVkDQpmb3IgKGkgaW4gZGF0YXNldCl7DQogIGRlbF9yb3dzID0gbnJvdyhnZXQoaSkpIC0gbnJvdyhnZXQocGFzdGUoaSwgIk5FVyIsIHNlcCA9ICIiKSkpDQogIGRlbF9wcm9wID0gZGVsX3Jvd3MgLyBucm93KGdldChwYXN0ZShpLCAiTkVXIiwgc2VwID0gIiIpKSkNCiAgZGVsX29icyA9IHBhc3RlKCJGb3IiLCBpLCBkZWxfcm93cywgImhhdmUgYmVlbiBkZWxldGVkLiIsDQogICAgICAgICAgICAgICAgICAiVGhleSB3ZXJlIHRoZSIsIHJvdW5kKGRlbF9wcm9wLCBkaWdpdHM9MiksICIlIG9mIHRoZSB0b3RhbC4iLCBzZXAgPSAiICIpDQogIHByaW50KGRlbF9vYnMpDQp9DQoNCmBgYA0KDQojIEFuYWx5c2lzICMNCg0KIyMgUENBICMjDQoNCiMjIFJlZ3Jlc3Npb24gIyMNCg0KIyMgQ2x1c3RlcmluZyAjIw0KDQoNCg==